Three Card Poker is a simple game in which the player and the dealer each receive 3 cards from a standard 52-card deck. Hand ranks from high to low are:
Of note, a straight is better than a flush in Three Card Poker. The player makes an Ante wager and then looks at their cards. The player may either make a Play wager equal to their Ante or may fold and lose only their Ante. If the player makes the Play bet, the dealer reveals their cards, with bets resolved as follows:
In addition, the player receives an “Ante Bonus” for holding a very strong hand, regardless of whether the dealer qualifies (Q32 or better) or even beats the player. Typically, the Ante bonus is 1 for Straight, 4 for Three-of-a-Kind, and 5 for Straight Flush
This file explores hands and outcomes in Three Card Poker. Tidyverse and several custom functions are used throughout:
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
source("./Generic_Added_Utility_Functions_202105_v001.R")
All possible combinations of 52 cards are gathered in a matrix:
# All combinations of 3 cards from a 52-card deck
aHands <- combn(1:52, 3) %>% t()
str(aHands)
## int [1:22100, 1:3] 1 1 1 1 1 1 1 1 1 1 ...
# Suits (1-13 are 1, 14-26 are 2, 27-39 are 3, 40-52 are 4)
aSuits <- 1 + (aHands-1) %/% 13
str(aSuits)
## num [1:22100, 1:3] 1 1 1 1 1 1 1 1 1 1 ...
table(aSuits)
## aSuits
## 1 2 3 4
## 16575 16575 16575 16575
# Ranks (1/14/27/40 are A stored as 1, ... , 13/26/39/52 are K stored as 13)
aRanks <- 1 + (aHands-1) %% 13
str(aRanks)
## num [1:22100, 1:3] 1 1 1 1 1 1 1 1 1 1 ...
table(aRanks)
## aRanks
## 1 2 3 4 5 6 7 8 9 10 11 12 13
## 5100 5100 5100 5100 5100 5100 5100 5100 5100 5100 5100 5100 5100
The combinations of suits (and therefore flushes) are explored:
tblSuits <- as.data.frame(sapply(1:4,
FUN=function(x) apply(aSuits[,],
1,
FUN=function(y) sum(y==x)
)
)
) %>%
tibble::as_tibble() %>%
mutate(nMax=apply(., 1, FUN=function(x) max(x)),
isFlush=(nMax==3),
handNum=row_number()
) %>%
select(handNum, everything())
# Sample of tibble
tblSuits
## # A tibble: 22,100 × 7
## handNum V1 V2 V3 V4 nMax isFlush
## <int> <int> <int> <int> <int> <int> <lgl>
## 1 1 3 0 0 0 3 TRUE
## 2 2 3 0 0 0 3 TRUE
## 3 3 3 0 0 0 3 TRUE
## 4 4 3 0 0 0 3 TRUE
## 5 5 3 0 0 0 3 TRUE
## 6 6 3 0 0 0 3 TRUE
## 7 7 3 0 0 0 3 TRUE
## 8 8 3 0 0 0 3 TRUE
## 9 9 3 0 0 0 3 TRUE
## 10 10 3 0 0 0 3 TRUE
## # ℹ 22,090 more rows
# Counts of suit distributions
tblSuits %>%
count(nMax, isFlush, V1, V2, V3, V4)
## # A tibble: 20 × 7
## nMax isFlush V1 V2 V3 V4 n
## <int> <lgl> <int> <int> <int> <int> <int>
## 1 1 FALSE 0 1 1 1 2197
## 2 1 FALSE 1 0 1 1 2197
## 3 1 FALSE 1 1 0 1 2197
## 4 1 FALSE 1 1 1 0 2197
## 5 2 FALSE 0 0 1 2 1014
## 6 2 FALSE 0 0 2 1 1014
## 7 2 FALSE 0 1 0 2 1014
## 8 2 FALSE 0 1 2 0 1014
## 9 2 FALSE 0 2 0 1 1014
## 10 2 FALSE 0 2 1 0 1014
## 11 2 FALSE 1 0 0 2 1014
## 12 2 FALSE 1 0 2 0 1014
## 13 2 FALSE 1 2 0 0 1014
## 14 2 FALSE 2 0 0 1 1014
## 15 2 FALSE 2 0 1 0 1014
## 16 2 FALSE 2 1 0 0 1014
## 17 3 TRUE 0 0 0 3 286
## 18 3 TRUE 0 0 3 0 286
## 19 3 TRUE 0 3 0 0 286
## 20 3 TRUE 3 0 0 0 286
The combinations of ranks (and therefore trips and straights and pairs) is explored, and combined with the suits data to create overall hand types:
tblRanks <- as.data.frame(sapply(1:13,
FUN=function(x) apply(aRanks[,],
1,
FUN=function(y) sum(y==x)
)
)
) %>%
tibble::as_tibble() %>%
mutate(nMax=apply(., 1, FUN=function(x) max(x)),
isTrips=(nMax==3),
isPair=(nMax==2),
handNum=row_number()
) %>%
select(handNum, everything())
# Matrix of ranks that constitute a straight
mtxStrRanks <- matrix(data=0L, nrow=13, ncol=12)
for(idx in 1:11) mtxStrRanks[idx:(idx+2), idx] <- 1L
mtxStrRanks[c(1, 12, 13), 12] <- 1L
mtxStrRanks
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
## [1,] 1 0 0 0 0 0 0 0 0 0 0 1
## [2,] 1 1 0 0 0 0 0 0 0 0 0 0
## [3,] 1 1 1 0 0 0 0 0 0 0 0 0
## [4,] 0 1 1 1 0 0 0 0 0 0 0 0
## [5,] 0 0 1 1 1 0 0 0 0 0 0 0
## [6,] 0 0 0 1 1 1 0 0 0 0 0 0
## [7,] 0 0 0 0 1 1 1 0 0 0 0 0
## [8,] 0 0 0 0 0 1 1 1 0 0 0 0
## [9,] 0 0 0 0 0 0 1 1 1 0 0 0
## [10,] 0 0 0 0 0 0 0 1 1 1 0 0
## [11,] 0 0 0 0 0 0 0 0 1 1 1 0
## [12,] 0 0 0 0 0 0 0 0 0 1 1 1
## [13,] 0 0 0 0 0 0 0 0 0 0 1 1
# tblRanks converted to matrix of yes/no by rank
mtxBoolRanks <- tblRanks %>%
select(V1:V13) %>%
mutate(across(.cols=everything(), .fns=function(x) pmin(1L, x))) %>%
as.matrix()
str(mtxBoolRanks)
## int [1:22100, 1:13] 1 1 1 1 1 1 1 1 1 1 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:13] "V1" "V2" "V3" "V4" ...
# Check for each type of straight
tblRanks <- tblRanks %>%
mutate(maxStr=mtxBoolRanks %*% mtxStrRanks %>% apply(., 1, FUN=max),
isStraight=(maxStr==3)
)
# Example of tibble
tblRanks
## # A tibble: 22,100 × 19
## handNum V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11
## <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
## 1 1 1 1 1 0 0 0 0 0 0 0 0
## 2 2 1 1 0 1 0 0 0 0 0 0 0
## 3 3 1 1 0 0 1 0 0 0 0 0 0
## 4 4 1 1 0 0 0 1 0 0 0 0 0
## 5 5 1 1 0 0 0 0 1 0 0 0 0
## 6 6 1 1 0 0 0 0 0 1 0 0 0
## 7 7 1 1 0 0 0 0 0 0 1 0 0
## 8 8 1 1 0 0 0 0 0 0 0 1 0
## 9 9 1 1 0 0 0 0 0 0 0 0 1
## 10 10 1 1 0 0 0 0 0 0 0 0 0
## # ℹ 22,090 more rows
## # ℹ 7 more variables: V12 <int>, V13 <int>, nMax <int>, isTrips <lgl>,
## # isPair <lgl>, maxStr <dbl>, isStraight <lgl>
# Counts of hand types based on rank
tblRanks %>%
count(nMax, isTrips, isPair, maxStr, isStraight)
## # A tibble: 6 × 6
## nMax isTrips isPair maxStr isStraight n
## <int> <lgl> <lgl> <dbl> <lgl> <int>
## 1 1 FALSE FALSE 1 FALSE 4544
## 2 1 FALSE FALSE 2 FALSE 12992
## 3 1 FALSE FALSE 3 TRUE 768
## 4 2 FALSE TRUE 1 FALSE 2544
## 5 2 FALSE TRUE 2 FALSE 1200
## 6 3 TRUE FALSE 1 FALSE 52
# Counts by straight
tblRanks %>%
filter(isStraight) %>%
group_by(across(starts_with("V"))) %>%
summarize(n=n(), .groups="drop")
## # A tibble: 12 × 14
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13
## <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
## 1 0 0 0 0 0 0 0 0 0 0 1 1 1
## 2 0 0 0 0 0 0 0 0 0 1 1 1 0
## 3 0 0 0 0 0 0 0 0 1 1 1 0 0
## 4 0 0 0 0 0 0 0 1 1 1 0 0 0
## 5 0 0 0 0 0 0 1 1 1 0 0 0 0
## 6 0 0 0 0 0 1 1 1 0 0 0 0 0
## 7 0 0 0 0 1 1 1 0 0 0 0 0 0
## 8 0 0 0 1 1 1 0 0 0 0 0 0 0
## 9 0 0 1 1 1 0 0 0 0 0 0 0 0
## 10 0 1 1 1 0 0 0 0 0 0 0 0 0
## 11 1 0 0 0 0 0 0 0 0 0 0 1 1
## 12 1 1 1 0 0 0 0 0 0 0 0 0 0
## # ℹ 1 more variable: n <int>
# Create handTypes
tblHandTypes <- tblRanks %>%
select(handNum, isTrips, isPair, isStraight) %>%
full_join(select(tblSuits, handNum, isFlush), by="handNum") %>%
mutate(handType=case_when(isStraight & isFlush ~ "01. SF",
isTrips ~ "02. Trips",
isStraight ~ "03. Straight",
isFlush ~ "04. Flush",
isPair ~ "05. Pair",
TRUE ~ "06. High Card"
)
)
tblHandTypes
## # A tibble: 22,100 × 6
## handNum isTrips isPair isStraight isFlush handType
## <int> <lgl> <lgl> <lgl> <lgl> <chr>
## 1 1 FALSE FALSE TRUE TRUE 01. SF
## 2 2 FALSE FALSE FALSE TRUE 04. Flush
## 3 3 FALSE FALSE FALSE TRUE 04. Flush
## 4 4 FALSE FALSE FALSE TRUE 04. Flush
## 5 5 FALSE FALSE FALSE TRUE 04. Flush
## 6 6 FALSE FALSE FALSE TRUE 04. Flush
## 7 7 FALSE FALSE FALSE TRUE 04. Flush
## 8 8 FALSE FALSE FALSE TRUE 04. Flush
## 9 9 FALSE FALSE FALSE TRUE 04. Flush
## 10 10 FALSE FALSE FALSE TRUE 04. Flush
## # ℹ 22,090 more rows
tblHandTypes %>%
count(handType, isTrips, isStraight, isFlush, isPair) %>%
arrange(n) %>%
mutate(pct=n/sum(n))
## # A tibble: 6 × 7
## handType isTrips isStraight isFlush isPair n pct
## <chr> <lgl> <lgl> <lgl> <lgl> <int> <dbl>
## 1 01. SF FALSE TRUE TRUE FALSE 48 0.00217
## 2 02. Trips TRUE FALSE FALSE FALSE 52 0.00235
## 3 03. Straight FALSE TRUE FALSE FALSE 720 0.0326
## 4 04. Flush FALSE FALSE TRUE FALSE 1096 0.0496
## 5 05. Pair FALSE FALSE FALSE TRUE 3744 0.169
## 6 06. High Card FALSE FALSE FALSE FALSE 16440 0.744
Hands by type are given a ranking:
# Ranks database
tblHandRanks <- tblHandTypes %>%
select(handNum, handType) %>%
full_join(select(tblRanks, handNum, V1:V13)) %>%
pivot_longer(cols=-c(handNum, handType)) %>%
filter(value>0) %>%
mutate(card=as.integer(str_replace(name, "V", "")),
card=ifelse(card==1, 14, card)
) %>%
arrange(handNum, desc(value), desc(card))
## Joining with `by = join_by(handNum)`
tblHandRanks
## # A tibble: 62,452 × 5
## handNum handType name value card
## <int> <chr> <chr> <int> <dbl>
## 1 1 01. SF V1 1 14
## 2 1 01. SF V3 1 3
## 3 1 01. SF V2 1 2
## 4 2 04. Flush V1 1 14
## 5 2 04. Flush V4 1 4
## 6 2 04. Flush V2 1 2
## 7 3 04. Flush V1 1 14
## 8 3 04. Flush V5 1 5
## 9 3 04. Flush V2 1 2
## 10 4 04. Flush V1 1 14
## # ℹ 62,442 more rows
# Hands without straights can be ranked directly
rnkNonStraight <- tblHandRanks %>%
group_by(handNum) %>%
mutate(rn=paste0("C", row_number())) %>%
filter(!(handType %in% c("01. SF", "03. Straight"))) %>%
pivot_wider(id_cols=c(handNum, handType),
names_from="rn",
values_from="card",
values_fill=0
) %>%
ungroup()
rnkNonStraight
## # A tibble: 21,332 × 5
## handNum handType C1 C2 C3
## <int> <chr> <dbl> <dbl> <dbl>
## 1 2 04. Flush 14 4 2
## 2 3 04. Flush 14 5 2
## 3 4 04. Flush 14 6 2
## 4 5 04. Flush 14 7 2
## 5 6 04. Flush 14 8 2
## 6 7 04. Flush 14 9 2
## 7 8 04. Flush 14 10 2
## 8 9 04. Flush 14 11 2
## 9 10 04. Flush 14 12 2
## 10 11 04. Flush 14 13 2
## # ℹ 21,322 more rows
# Ranks within type for non-straights
rnkNonStraight <- rnkNonStraight %>%
count(handType, C1, C2, C3, name="nSameRank") %>%
arrange(handType, desc(C1), desc(C2), desc(C3)) %>%
group_by(handType) %>%
mutate(rankOfType=row_number()) %>%
ungroup() %>%
inner_join(rnkNonStraight, by=c("handType", "C1", "C2", "C3"))
rnkNonStraight
## # A tibble: 21,332 × 7
## handType C1 C2 C3 nSameRank rankOfType handNum
## <chr> <dbl> <dbl> <dbl> <int> <int> <int>
## 1 02. Trips 14 0 0 4 1 547
## 2 02. Trips 14 0 0 4 1 560
## 3 02. Trips 14 0 0 4 1 963
## 4 02. Trips 14 0 0 4 1 13352
## 5 02. Trips 13 0 0 4 2 12623
## 6 02. Trips 13 0 0 4 2 12636
## 7 02. Trips 13 0 0 4 2 12883
## 8 02. Trips 13 0 0 4 2 19422
## 9 02. Trips 12 0 0 4 3 11855
## 10 02. Trips 12 0 0 4 3 11868
## # ℹ 21,322 more rows
# Ranks for hands with straights
rnkStraight <- tblHandRanks %>%
group_by(handNum) %>%
mutate(rn=paste0("C", row_number())) %>%
filter(handType %in% c("01. SF", "03. Straight")) %>%
pivot_wider(id_cols=c(handNum, handType),
names_from="rn",
values_from="card",
values_fill=0
) %>%
ungroup() %>%
mutate(C1=ifelse(C1==14 & C2==3, 1, C1))
rnkStraight
## # A tibble: 768 × 5
## handNum handType C1 C2 C3
## <int> <chr> <dbl> <dbl> <dbl>
## 1 1 01. SF 1 3 2
## 2 14 03. Straight 1 3 2
## 3 27 03. Straight 1 3 2
## 4 40 03. Straight 1 3 2
## 5 62 03. Straight 1 3 2
## 6 75 03. Straight 1 3 2
## 7 88 03. Straight 1 3 2
## 8 456 01. SF 14 13 12
## 9 469 03. Straight 14 13 12
## 10 482 03. Straight 14 13 12
## # ℹ 758 more rows
# Ranks within type for straights
rnkStraight <- rnkStraight %>%
count(handType, C1, C2, C3, name="nSameRank") %>%
arrange(handType, desc(C1), desc(C2), desc(C3)) %>%
group_by(handType) %>%
mutate(rankOfType=row_number()) %>%
ungroup() %>%
inner_join(rnkStraight, by=c("handType", "C1", "C2", "C3"))
rnkStraight
## # A tibble: 768 × 7
## handType C1 C2 C3 nSameRank rankOfType handNum
## <chr> <dbl> <dbl> <dbl> <int> <int> <int>
## 1 01. SF 14 13 12 4 1 456
## 2 01. SF 14 13 12 4 1 13287
## 3 01. SF 14 13 12 4 1 19696
## 4 01. SF 14 13 12 4 1 21880
## 5 01. SF 13 12 11 4 2 10621
## 6 01. SF 13 12 11 4 2 18447
## 7 01. SF 13 12 11 4 2 21541
## 8 01. SF 13 12 11 4 2 22100
## 9 01. SF 12 11 10 4 3 9760
## 10 01. SF 12 11 10 4 3 18041
## # ℹ 758 more rows
# Full hand ranks
rnkAllHands <- bind_rows(rnkStraight, rnkNonStraight) %>%
arrange(handType, rankOfType) %>%
mutate(chgType=handType!=lag(handType),
chgRank=rankOfType!=lag(rankOfType),
chgVal=ifelse(row_number()==1, 1, ifelse(chgType|chgRank, 1, 0)),
rankOverall=cumsum(chgVal)
)
rnkAllHands
## # A tibble: 22,100 × 11
## handType C1 C2 C3 nSameRank rankOfType handNum chgType chgRank
## <chr> <dbl> <dbl> <dbl> <int> <int> <int> <lgl> <lgl>
## 1 01. SF 14 13 12 4 1 456 NA NA
## 2 01. SF 14 13 12 4 1 13287 FALSE FALSE
## 3 01. SF 14 13 12 4 1 19696 FALSE FALSE
## 4 01. SF 14 13 12 4 1 21880 FALSE FALSE
## 5 01. SF 13 12 11 4 2 10621 FALSE TRUE
## 6 01. SF 13 12 11 4 2 18447 FALSE FALSE
## 7 01. SF 13 12 11 4 2 21541 FALSE FALSE
## 8 01. SF 13 12 11 4 2 22100 FALSE FALSE
## 9 01. SF 12 11 10 4 3 9760 FALSE TRUE
## 10 01. SF 12 11 10 4 3 18041 FALSE FALSE
## # ℹ 22,090 more rows
## # ℹ 2 more variables: chgVal <dbl>, rankOverall <dbl>
rnkAllHands %>% count(rankOverall, name="nRankOverall")
## # A tibble: 741 × 2
## rankOverall nRankOverall
## <dbl> <int>
## 1 1 4
## 2 2 4
## 3 3 4
## 4 4 4
## 5 5 4
## 6 6 4
## 7 7 4
## 8 8 4
## 9 9 4
## 10 10 4
## # ℹ 731 more rows
rnkAllHands %>%
count(rankOverall, handType, name="nRankOverall") %>%
count(handType, nRankOverall)
## # A tibble: 6 × 3
## handType nRankOverall n
## <chr> <int> <int>
## 1 01. SF 4 12
## 2 02. Trips 4 13
## 3 03. Straight 60 12
## 4 04. Flush 4 274
## 5 05. Pair 24 156
## 6 06. High Card 60 274
A list of hand numbers containing each card (1-52) is created, as well as a function to generate excluded hand numbers:
cardInHand <- lapply(1:52, FUN=function(x) which(rowSums(aHands==x)==1))
str(cardInHand)
## List of 52
## $ : int [1:1275] 1 2 3 4 5 6 7 8 9 10 ...
## $ : int [1:1275] 1 2 3 4 5 6 7 8 9 10 ...
## $ : int [1:1275] 1 51 52 53 54 55 56 57 58 59 ...
## $ : int [1:1275] 2 51 100 101 102 103 104 105 106 107 ...
## $ : int [1:1275] 3 52 100 148 149 150 151 152 153 154 ...
## $ : int [1:1275] 4 53 101 148 195 196 197 198 199 200 ...
## $ : int [1:1275] 5 54 102 149 195 241 242 243 244 245 ...
## $ : int [1:1275] 6 55 103 150 196 241 286 287 288 289 ...
## $ : int [1:1275] 7 56 104 151 197 242 286 330 331 332 ...
## $ : int [1:1275] 8 57 105 152 198 243 287 330 373 374 ...
## $ : int [1:1275] 9 58 106 153 199 244 288 331 373 415 ...
## $ : int [1:1275] 10 59 107 154 200 245 289 332 374 415 ...
## $ : int [1:1275] 11 60 108 155 201 246 290 333 375 416 ...
## $ : int [1:1275] 12 61 109 156 202 247 291 334 376 417 ...
## $ : int [1:1275] 13 62 110 157 203 248 292 335 377 418 ...
## $ : int [1:1275] 14 63 111 158 204 249 293 336 378 419 ...
## $ : int [1:1275] 15 64 112 159 205 250 294 337 379 420 ...
## $ : int [1:1275] 16 65 113 160 206 251 295 338 380 421 ...
## $ : int [1:1275] 17 66 114 161 207 252 296 339 381 422 ...
## $ : int [1:1275] 18 67 115 162 208 253 297 340 382 423 ...
## $ : int [1:1275] 19 68 116 163 209 254 298 341 383 424 ...
## $ : int [1:1275] 20 69 117 164 210 255 299 342 384 425 ...
## $ : int [1:1275] 21 70 118 165 211 256 300 343 385 426 ...
## $ : int [1:1275] 22 71 119 166 212 257 301 344 386 427 ...
## $ : int [1:1275] 23 72 120 167 213 258 302 345 387 428 ...
## $ : int [1:1275] 24 73 121 168 214 259 303 346 388 429 ...
## $ : int [1:1275] 25 74 122 169 215 260 304 347 389 430 ...
## $ : int [1:1275] 26 75 123 170 216 261 305 348 390 431 ...
## $ : int [1:1275] 27 76 124 171 217 262 306 349 391 432 ...
## $ : int [1:1275] 28 77 125 172 218 263 307 350 392 433 ...
## $ : int [1:1275] 29 78 126 173 219 264 308 351 393 434 ...
## $ : int [1:1275] 30 79 127 174 220 265 309 352 394 435 ...
## $ : int [1:1275] 31 80 128 175 221 266 310 353 395 436 ...
## $ : int [1:1275] 32 81 129 176 222 267 311 354 396 437 ...
## $ : int [1:1275] 33 82 130 177 223 268 312 355 397 438 ...
## $ : int [1:1275] 34 83 131 178 224 269 313 356 398 439 ...
## $ : int [1:1275] 35 84 132 179 225 270 314 357 399 440 ...
## $ : int [1:1275] 36 85 133 180 226 271 315 358 400 441 ...
## $ : int [1:1275] 37 86 134 181 227 272 316 359 401 442 ...
## $ : int [1:1275] 38 87 135 182 228 273 317 360 402 443 ...
## $ : int [1:1275] 39 88 136 183 229 274 318 361 403 444 ...
## $ : int [1:1275] 40 89 137 184 230 275 319 362 404 445 ...
## $ : int [1:1275] 41 90 138 185 231 276 320 363 405 446 ...
## $ : int [1:1275] 42 91 139 186 232 277 321 364 406 447 ...
## $ : int [1:1275] 43 92 140 187 233 278 322 365 407 448 ...
## $ : int [1:1275] 44 93 141 188 234 279 323 366 408 449 ...
## $ : int [1:1275] 45 94 142 189 235 280 324 367 409 450 ...
## $ : int [1:1275] 46 95 143 190 236 281 325 368 410 451 ...
## $ : int [1:1275] 47 96 144 191 237 282 326 369 411 452 ...
## $ : int [1:1275] 48 97 145 192 238 283 327 370 412 453 ...
## $ : int [1:1275] 49 98 146 193 239 284 328 371 413 454 ...
## $ : int [1:1275] 50 99 147 194 240 285 329 372 414 455 ...
exclHands <- function(exclCards, lstExclude=cardInHand) {
purrr::reduce(.x=lstExclude[c(exclCards)], .f=union)
}
# Example usages
str(exclHands(c(1)))
## int [1:1275] 1 2 3 4 5 6 7 8 9 10 ...
str(exclHands(c(1, 14, 27)))
## int [1:3676] 1 2 3 4 5 6 7 8 9 10 ...
str(exclHands(c(1:39)))
## int [1:21814] 1 2 3 4 5 6 7 8 9 10 ...
str(exclHands(c(1:49)))
## int [1:22099] 1 2 3 4 5 6 7 8 9 10 ...
# Confirm correct lengths of exclusion
choose(52,3) - choose(c(51, 49, 13, 3), 3)
## [1] 1275 3676 21814 22099
# Confirm distribution of remaining cards
# Note that 1e9 is a workaround to send a non-existent index to avoid mismatched formats
map_dfr(list(c(), c(1), c(1, 14, 27), c(1:39), c(1:49)),
.f=function(x) {
a <- if(length(x)>0) table(aHands[-exclHands(x),]) else table(aHands[-1e9,])
as.data.frame(a)
},
.id="src"
) %>%
tibble::as_tibble() %>%
mutate(src=c("1"="All",
"2"="Excl 1",
"3"="Excl 3",
"4"="Excl 39",
"5"="Excl 49"
)[src]) %>%
pivot_wider(id_cols="Var1", names_from="src", values_from="Freq", values_fill=0) %>%
rename(Card=Var1) %>%
print(n=52)
## # A tibble: 52 × 6
## Card All `Excl 1` `Excl 3` `Excl 39` `Excl 49`
## <fct> <int> <int> <int> <int> <int>
## 1 1 1275 0 0 0 0
## 2 2 1275 1225 1128 0 0
## 3 3 1275 1225 1128 0 0
## 4 4 1275 1225 1128 0 0
## 5 5 1275 1225 1128 0 0
## 6 6 1275 1225 1128 0 0
## 7 7 1275 1225 1128 0 0
## 8 8 1275 1225 1128 0 0
## 9 9 1275 1225 1128 0 0
## 10 10 1275 1225 1128 0 0
## 11 11 1275 1225 1128 0 0
## 12 12 1275 1225 1128 0 0
## 13 13 1275 1225 1128 0 0
## 14 14 1275 1225 0 0 0
## 15 15 1275 1225 1128 0 0
## 16 16 1275 1225 1128 0 0
## 17 17 1275 1225 1128 0 0
## 18 18 1275 1225 1128 0 0
## 19 19 1275 1225 1128 0 0
## 20 20 1275 1225 1128 0 0
## 21 21 1275 1225 1128 0 0
## 22 22 1275 1225 1128 0 0
## 23 23 1275 1225 1128 0 0
## 24 24 1275 1225 1128 0 0
## 25 25 1275 1225 1128 0 0
## 26 26 1275 1225 1128 0 0
## 27 27 1275 1225 0 0 0
## 28 28 1275 1225 1128 0 0
## 29 29 1275 1225 1128 0 0
## 30 30 1275 1225 1128 0 0
## 31 31 1275 1225 1128 0 0
## 32 32 1275 1225 1128 0 0
## 33 33 1275 1225 1128 0 0
## 34 34 1275 1225 1128 0 0
## 35 35 1275 1225 1128 0 0
## 36 36 1275 1225 1128 0 0
## 37 37 1275 1225 1128 0 0
## 38 38 1275 1225 1128 0 0
## 39 39 1275 1225 1128 0 0
## 40 40 1275 1225 1128 66 0
## 41 41 1275 1225 1128 66 0
## 42 42 1275 1225 1128 66 0
## 43 43 1275 1225 1128 66 0
## 44 44 1275 1225 1128 66 0
## 45 45 1275 1225 1128 66 0
## 46 46 1275 1225 1128 66 0
## 47 47 1275 1225 1128 66 0
## 48 48 1275 1225 1128 66 0
## 49 49 1275 1225 1128 66 0
## 50 50 1275 1225 1128 66 1
## 51 51 1275 1225 1128 66 1
## 52 52 1275 1225 1128 66 1
Possible outcomes for an example hand are explored:
# Include whether the hand qualifies
tblHands <- rnkAllHands %>%
mutate(qual=(handType!="06. High Card" | C1 >= 12))
tblHands %>% count(handType, qual) %>% mutate(pct=n/sum(n))
## # A tibble: 7 × 4
## handType qual n pct
## <chr> <lgl> <int> <dbl>
## 1 01. SF TRUE 48 0.00217
## 2 02. Trips TRUE 52 0.00235
## 3 03. Straight TRUE 720 0.0326
## 4 04. Flush TRUE 1096 0.0496
## 5 05. Pair TRUE 3744 0.169
## 6 06. High Card FALSE 6720 0.304
## 7 06. High Card TRUE 9720 0.440
# Pull an example hand number, get rank and exclusions
tmpHand <- 12345
aHands[tmpHand,]
## [1] 13 17 31
tblHands %>% filter(handNum==tmpHand)
## # A tibble: 1 × 12
## handType C1 C2 C3 nSameRank rankOfType handNum chgType chgRank chgVal
## <chr> <dbl> <dbl> <dbl> <int> <int> <int> <lgl> <lgl> <dbl>
## 1 06. Hig… 13 5 4 60 113 12345 FALSE FALSE 0
## # ℹ 2 more variables: rankOverall <dbl>, qual <lgl>
tmpRank <- tblHands %>% filter(handNum==tmpHand) %>% pull(rankOverall)
tmpRes <- tblHands %>%
select(handNum, rankOverall, qual) %>%
filter(!(handNum %in% exclHands(aHands[tmpHand,,drop=TRUE]))) %>%
mutate(res=case_when(rankOverall<tmpRank~"L", rankOverall>tmpRank~"W", TRUE~"T")) %>%
count(qual, res) %>%
mutate(pct=n/sum(n),
resPlay=case_when(!qual~1, res=="L"~-2, res=="W"~2, TRUE~0)
)
tmpRes
## # A tibble: 4 × 5
## qual res n pct resPlay
## <lgl> <chr> <int> <dbl> <dbl>
## 1 FALSE W 5754 0.312 1
## 2 TRUE L 10089 0.548 -2
## 3 TRUE T 26 0.00141 0
## 4 TRUE W 2555 0.139 2
cat("EV of playing:",
round(sum(tmpRes$pct*tmpRes$resPlay), 3),
"vs. EV of folding: -1.000\n"
)
## EV of playing: -0.506 vs. EV of folding: -1.000
The process is converted to functional form:
assessHand <- function(hn,
dbHands=tblHands,
mtxHands=aHands,
exclList=exclHands
) {
# FUNCTION ARGUMENTS:
# hn: hand number to evaluate
# dbHands: data frame containing map of hand number to overall rank and qualification
# mtxHands: matrix of hands to cards, sorted by hand number
# exclList: list of hands excluded by card
# Get rank of requested hand
keyRank <- dbHands %>% filter(handNum==hn) %>% pull(rankOverall)
# Get and return outcomes of requested hand
dbHands %>%
select(handNum, rankOverall, qual) %>%
filter(!(handNum %in% exclList(mtxHands[hn,,drop=TRUE]))) %>%
mutate(res=case_when(rankOverall<keyRank~"L",
rankOverall>keyRank~"W",
TRUE~"T"
)
) %>%
count(qual, res) %>%
mutate(pct=n/sum(n), handNum=hn)
}
assessHand(12345)
## # A tibble: 4 × 5
## qual res n pct handNum
## <lgl> <chr> <int> <dbl> <dbl>
## 1 FALSE W 5754 0.312 12345
## 2 TRUE L 10089 0.548 12345
## 3 TRUE T 26 0.00141 12345
## 4 TRUE W 2555 0.139 12345
map_dfr(.x=1:100, .f=function(x) assessHand(x))
## # A tibble: 400 × 5
## qual res n pct handNum
## <lgl> <chr> <int> <dbl> <int>
## 1 FALSE W 5706 0.310 1
## 2 TRUE L 41 0.00223 1
## 3 TRUE T 3 0.000163 1
## 4 TRUE W 12674 0.688 1
## 5 FALSE W 5721 0.311 2
## 6 TRUE L 896 0.0486 2
## 7 TRUE T 3 0.000163 2
## 8 TRUE W 11804 0.641 2
## 9 FALSE W 5724 0.311 3
## 10 TRUE L 886 0.0481 3
## # ℹ 390 more rows
Every hand is assessed, with process cached and results saved as RDS:
t <- proc.time()
allTCPHandData <- map_dfr(.x=1:nrow(tblHands), .f=function(x) assessHand(x))
allTCPHandData
## # A tibble: 88,164 × 5
## qual res n pct handNum
## <lgl> <chr> <int> <dbl> <int>
## 1 FALSE W 5706 0.310 1
## 2 TRUE L 41 0.00223 1
## 3 TRUE T 3 0.000163 1
## 4 TRUE W 12674 0.688 1
## 5 FALSE W 5721 0.311 2
## 6 TRUE L 896 0.0486 2
## 7 TRUE T 3 0.000163 2
## 8 TRUE W 11804 0.641 2
## 9 FALSE W 5724 0.311 3
## 10 TRUE L 886 0.0481 3
## # ℹ 88,154 more rows
proc.time() - t
## user system elapsed
## 861.93 46.34 922.68
saveToRDS(allTCPHandData)
Data are converted to a single line per hand:
# Counts of result by hand type
allTCPHandN <- allTCPHandData %>%
mutate(name=paste0("n", res, ifelse(qual, "Q", "N"))) %>%
pivot_wider(id_cols="handNum", names_from="name", values_from="n", values_fill=0)
allTCPHandN
## # A tibble: 22,100 × 7
## handNum nWN nLQ nTQ nWQ nLN nTN
## <int> <int> <int> <int> <int> <int> <int>
## 1 1 5706 41 3 12674 0 0
## 2 2 5721 896 3 11804 0 0
## 3 3 5724 886 3 11811 0 0
## 4 4 5724 877 3 11820 0 0
## 5 5 5724 865 3 11832 0 0
## 6 6 5724 850 3 11847 0 0
## 7 7 5724 832 3 11865 0 0
## 8 8 5709 811 3 11901 0 0
## 9 9 5694 787 3 11940 0 0
## 10 10 6195 764 3 11462 0 0
## # ℹ 22,090 more rows
# Percentages of result by hand type
allTCPHandPct <- allTCPHandData %>%
mutate(name=paste0("p", res, ifelse(qual, "Q", "N"))) %>%
pivot_wider(id_cols="handNum", names_from="name", values_from="pct", values_fill=0)
allTCPHandPct
## # A tibble: 22,100 × 7
## handNum pWN pLQ pTQ pWQ pLN pTN
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.310 0.00223 0.000163 0.688 0 0
## 2 2 0.311 0.0486 0.000163 0.641 0 0
## 3 3 0.311 0.0481 0.000163 0.641 0 0
## 4 4 0.311 0.0476 0.000163 0.642 0 0
## 5 5 0.311 0.0469 0.000163 0.642 0 0
## 6 6 0.311 0.0461 0.000163 0.643 0 0
## 7 7 0.311 0.0452 0.000163 0.644 0 0
## 8 8 0.310 0.0440 0.000163 0.646 0 0
## 9 9 0.309 0.0427 0.000163 0.648 0 0
## 10 10 0.336 0.0415 0.000163 0.622 0 0
## # ℹ 22,090 more rows
# Identical results by hand type
eqResults <- allTCPHandN %>%
arrange(nWN, nLQ, nTQ, nWQ, nLN, nTN) %>%
mutate(oa=apply(select(., -handNum), 1, FUN=function(x) paste0(x, collapse="-")),
isNew=ifelse(row_number()==1, 1, ifelse(oa!=lag(oa), 1, 0)),
typeNum=cumsum(isNew)
)
eqResults
## # A tibble: 22,100 × 10
## handNum nWN nLQ nTQ nWQ nLN nTN oa isNew typeNum
## <int> <int> <int> <int> <int> <int> <int> <chr> <dbl> <dbl>
## 1 1849 0 13147 0 0 5251 26 0-13147-0-0-5251-26 1 1
## 2 1862 0 13147 0 0 5251 26 0-13147-0-0-5251-26 0 1
## 3 1916 0 13147 0 0 5251 26 0-13147-0-0-5251-26 0 1
## 4 1929 0 13147 0 0 5251 26 0-13147-0-0-5251-26 0 1
## 5 2239 0 13147 0 0 5251 26 0-13147-0-0-5251-26 0 1
## 6 2280 0 13147 0 0 5251 26 0-13147-0-0-5251-26 0 1
## 7 2989 0 13147 0 0 5251 26 0-13147-0-0-5251-26 0 1
## 8 3002 0 13147 0 0 5251 26 0-13147-0-0-5251-26 0 1
## 9 3091 0 13147 0 0 5251 26 0-13147-0-0-5251-26 0 1
## 10 3104 0 13147 0 0 5251 26 0-13147-0-0-5251-26 0 1
## # ℹ 22,090 more rows
summary(eqResults)
## handNum nWN nLQ nTQ nWQ
## Min. : 1 Min. : 0 Min. : 0 Min. : 0.00 Min. : 0
## 1st Qu.: 5526 1st Qu.:4481 1st Qu.: 4737 1st Qu.: 0.00 1st Qu.: 0
## Median :11050 Median :5717 Median : 9119 Median : 3.00 Median : 3565
## Mean :11050 Mean :4794 Mean : 8401 Mean :12.66 Mean : 4408
## 3rd Qu.:16575 3rd Qu.:5748 3rd Qu.:13114 3rd Qu.:25.00 3rd Qu.: 8299
## Max. :22100 Max. :6720 Max. :13181 Max. :26.00 Max. :13228
## nLN nTN oa isNew
## Min. : 0.0 Min. : 0.000 Length:22100 Min. :0.00000
## 1st Qu.: 0.0 1st Qu.: 0.000 Class :character 1st Qu.:0.00000
## Median : 0.0 Median : 0.000 Mode :character Median :0.00000
## Mean : 800.7 Mean : 7.723 Mean :0.07475
## 3rd Qu.: 756.0 3rd Qu.:25.000 3rd Qu.:0.00000
## Max. :5251.0 Max. :26.000 Max. :1.00000
## typeNum
## Min. : 1
## 1st Qu.: 333
## Median : 830
## Mean : 815
## 3rd Qu.:1261
## Max. :1652
There are 1,652 unique combinations of outcomes across the 22,100 starting hands
The optimal decision is included, based on hand percentages:
allTCPHandAction <- allTCPHandPct %>%
mutate(evPlay=(pWN+pTN+pLN)+2*(pWQ-pLQ),
action=ifelse(evPlay>=-1, "Play", "Fold"),
evOptimal=ifelse(action=="Fold", -1, evPlay)
) %>%
select(handNum, evPlay, action, evOptimal)
allTCPHandAction
## # A tibble: 22,100 × 4
## handNum evPlay action evOptimal
## <int> <dbl> <chr> <dbl>
## 1 1 1.68 Play 1.68
## 2 2 1.49 Play 1.49
## 3 3 1.50 Play 1.50
## 4 4 1.50 Play 1.50
## 5 5 1.50 Play 1.50
## 6 6 1.50 Play 1.50
## 7 7 1.51 Play 1.51
## 8 8 1.51 Play 1.51
## 9 9 1.52 Play 1.52
## 10 10 1.50 Play 1.50
## # ℹ 22,090 more rows
allTCPHandAction %>%
summarize(across(starts_with("ev"), mean))
## # A tibble: 1 × 2
## evPlay evOptimal
## <dbl> <dbl>
## 1 -0.129 -0.0866
allTCPHandAction %>%
group_by(action) %>%
summarize(n=n(), across(starts_with("ev"), mean))
## # A tibble: 2 × 4
## action n evPlay evOptimal
## <chr> <int> <dbl> <dbl>
## 1 Fold 7200 -1.13 -1
## 2 Play 14900 0.355 0.355
allTCPHandAction %>%
full_join(select(tblHands, handNum, handType), by="handNum") %>%
group_by(handType, action) %>%
summarize(n=n(), across(starts_with("ev"), mean), .groups="drop")
## # A tibble: 7 × 5
## handType action n evPlay evOptimal
## <chr> <chr> <int> <dbl> <dbl>
## 1 01. SF Play 48 1.69 1.69
## 2 02. Trips Play 52 1.68 1.68
## 3 03. Straight Play 720 1.61 1.61
## 4 04. Flush Play 1096 1.45 1.45
## 5 05. Pair Play 3744 1.01 1.01
## 6 06. High Card Fold 7200 -1.13 -1
## 7 06. High Card Play 9240 -0.152 -0.152
The decision function for hands with only a high card is plotted:
allTCPHandAction %>%
full_join(select(tblHands, handNum, handType, C1, C2), by="handNum") %>%
filter(handType=="06. High Card") %>%
group_by(C1, C2) %>%
summarize(pctPlay=mean(action=="Play"),
evPlay=mean(evPlay),
n=n(),
.groups="drop"
) %>%
ggplot(aes(y=factor(C1, levels=5:14), x=factor(C2, levels=13:3))) +
geom_tile(aes(fill=pctPlay)) +
geom_text(aes(label=paste0(round(100*pctPlay, 1), "%\n(", round(evPlay, 3), ")")),
size=2.5
) +
scale_fill_continuous("% Play", low="white", high="lightgreen") +
labs(title="Percentage of hands played optimally (high-card only hands)",
subtitle="Cell contents are % played (mean EV of playing)",
y="Highest card (14=A, 13=K, 12=Q, 11=J)",
x="Second-highest card (13=K, 12=Q, 11=J)"
)
The EV for hands with a pair is plotted:
allTCPHandAction %>%
full_join(select(tblHands, handNum, handType, C1, C2), by="handNum") %>%
filter(handType=="05. Pair") %>%
group_by(C1, C2) %>%
summarize(pctPlay=mean(action=="Play"),
evPlay=mean(evPlay),
n=n(),
.groups="drop"
) %>%
ggplot(aes(y=factor(C1, levels=2:14), x=factor(C2, levels=14:2))) +
geom_tile(aes(fill=evPlay)) +
geom_text(aes(label=paste0(round(evPlay, 3))),
size=2.5
) +
scale_fill_continuous("EV Play", low="white", high="lightgreen", limits=c(0, 2)) +
labs(title="EV of pair hands (pair hands always played)",
subtitle="Cell contents are mean EV of playing",
y="Paired card (14=A, 13=K, 12=Q, 11=J)",
x="Kicker (14=A, 13=K, 12=Q, 11=J)"
)
Due to the unlikelihood of a kicker being needed when comparing a player and dealer pair, there are some non-monotonic features to EV. In general, a better kicker is less likely to block dealer qualifying (not A/K/Q) but more likely to block higher dealer pairs. For example, with a pair of 4s, J kicker has EV +0.81 while Q kicker has EV +0.79, and 5 kicker has EV +0.80 while 3 kicker has EV +0.77
The impact of suits on EV is explored:
allTCPHandAction %>%
full_join(select(tblHands, handNum, handType, C1, C2, C3), by="handNum") %>%
full_join(select(tblSuits, handNum, nMax), by="handNum") %>%
filter(handType=="06. High Card") %>%
group_by(C1, C2, nMax) %>%
summarize(n=n(),
evMean=mean(evPlay),
evMax=max(evPlay),
evMin=min(evPlay),
.groups="drop"
) %>%
arrange(C1, C2, nMax) %>%
group_by(C1, C2) %>%
summarize(delta_21=last(evMean)-first(evMean), .groups="drop") %>%
ggplot(aes(y=factor(C1, levels=5:14), x=factor(C2, levels=13:3))) +
geom_tile(aes(fill=delta_21)) +
geom_text(aes(label=paste0("", round(delta_21, 4), "")),
size=2.5
) +
scale_fill_continuous("Delta EV", high="white", low="pink") +
labs(title="Mean difference in EV for suits 2-1-0 vs. 1-1-1 (high card only)",
subtitle="Negative reflects 2-1-0 having lower mean EV than 1-1-1",
y="Highest card (14=A, 13=K, 12=Q, 11=J)",
x="Second-highest card (13=K, 12=Q, 11=J)"
)
Suits play a very small role in mean EV for given high-card hand ranks, with impact ranging from ~0.0020 better for AKx rainbow to ~0.0005 better for Q32 rainbow
Function assessHand() is re-written to allow for specifying cards to be excluded:
oldAssess <- assessHand
assessHand <- function(hn=NULL,
keyRank=NULL,
dbHands=tblHands,
mtxHands=aHands,
exclCards=NULL,
exclList=exclHands
) {
# FUNCTION ARGUMENTS:
# hn: hand number to evaluate (if NULL, need to pass keyRank and exclCards instead)
# keyRank: hand rank for use in dbHands (if NULL, calculated from hn)
# dbHands: data frame containing map of hand number to overall rank and qualification
# mtxHands: matrix of hands to cards, sorted by hand number
# exclCards: cards not available for other hands (if NULL, the cards in hn)
# exclList: list of hands excluded by card
# Get rank of requested hand
if(is.null(keyRank)) {
if(is.null(hn)) stop("\nMust pass either hn or keyRank\n")
keyRank <- dbHands %>% filter(handNum==hn) %>% pull(rankOverall)
}
# Get cards to be excluded
if(is.null(exclCards)) {
if(is.null(hn)) stop("\nMust pass either hn or exclCards\n")
exclCards <- mtxHands[hn,,drop=TRUE]
}
# Get and return outcomes of requested hand
dbHands %>%
select(handNum, rankOverall, qual) %>%
filter(!(handNum %in% exclList(exclCards))) %>%
mutate(res=case_when(rankOverall<keyRank~"L",
rankOverall>keyRank~"W",
TRUE~"T"
)
) %>%
count(qual, res) %>%
mutate(pct=n/sum(n),
handNum=ifelse(is.null(hn), NA_integer_, as.integer(hn)),
keyRank=keyRank
)
}
# hn=123 has cards 1, 4, 28 (A42 with A-4 suited) and is of rank 531
oldAssess(hn=123)
## # A tibble: 4 × 5
## qual res n pct handNum
## <lgl> <chr> <int> <dbl> <dbl>
## 1 FALSE W 5728 0.311 123
## 2 TRUE L 7353 0.399 123
## 3 TRUE T 25 0.00136 123
## 4 TRUE W 5318 0.289 123
assessHand(hn=123)
## # A tibble: 4 × 6
## qual res n pct handNum keyRank
## <lgl> <chr> <int> <dbl> <int> <dbl>
## 1 FALSE W 5728 0.311 123 531
## 2 TRUE L 7353 0.399 123 531
## 3 TRUE T 25 0.00136 123 531
## 4 TRUE W 5318 0.289 123 531
assessHand(keyRank=531, exclCards=c(1, 4, 28))
## # A tibble: 4 × 6
## qual res n pct handNum keyRank
## <lgl> <chr> <int> <dbl> <int> <dbl>
## 1 FALSE W 5728 0.311 NA 531
## 2 TRUE L 7353 0.399 NA 531
## 3 TRUE T 25 0.00136 NA 531
## 4 TRUE W 5318 0.289 NA 531
assessHand(keyRank=531, exclCards=c(1, 4, 28, 38)) # exclude an extra offsuit Q
## # A tibble: 4 × 6
## qual res n pct handNum keyRank
## <lgl> <chr> <int> <dbl> <int> <dbl>
## 1 FALSE W 5728 0.331 NA 531
## 2 TRUE L 6951 0.402 NA 531
## 3 TRUE T 25 0.00145 NA 531
## 4 TRUE W 4592 0.265 NA 531
The lowest playing hand is explored for the impact of each additional card excluded:
tmpHand <- allTCPHandAction %>%
filter(action=="Play") %>%
arrange(evPlay) %>%
slice(1) %>%
pull(handNum)
tmpRank <- tblHands %>%
filter(handNum==tmpHand) %>%
pull(rankOverall)
tmpCards <- aHands[tmpHand,,drop=TRUE]
cat("\nHand number:",
tmpHand,
"with cards [",
paste0(tmpCards, sep=""),
"] and rank",
tmpRank,
"will be explored\n"
)
##
## Hand number: 3742 with cards [ 4 6 25 ] and rank 621 will be explored
assessHand(hn=tmpHand)
## # A tibble: 4 × 6
## qual res n pct handNum keyRank
## <lgl> <chr> <int> <dbl> <int> <dbl>
## 1 FALSE W 5751 0.312 3742 621
## 2 TRUE L 12343 0.670 3742 621
## 3 TRUE T 25 0.00136 3742 621
## 4 TRUE W 305 0.0166 3742 621
assessHand(keyRank=tmpRank, exclCards=tmpCards)
## # A tibble: 4 × 6
## qual res n pct handNum keyRank
## <lgl> <chr> <int> <dbl> <int> <dbl>
## 1 FALSE W 5751 0.312 NA 621
## 2 TRUE L 12343 0.670 NA 621
## 3 TRUE T 25 0.00136 NA 621
## 4 TRUE W 305 0.0166 NA 621
tmpWL <- map_dfr(.x=setdiff(1:52, tmpCards),
.f=function(x) {
assessHand(keyRank=tmpRank, exclCards=c(tmpCards, x)) %>%
mutate(extraCard=x)
}
)
tmpWL
## # A tibble: 196 × 7
## qual res n pct handNum keyRank extraCard
## <lgl> <chr> <int> <dbl> <int> <dbl> <int>
## 1 FALSE W 5751 0.333 NA 621 1
## 2 TRUE L 11215 0.648 NA 621 1
## 3 TRUE T 25 0.00145 NA 621 1
## 4 TRUE W 305 0.0176 NA 621 1
## 5 FALSE W 5271 0.305 NA 621 2
## 6 TRUE L 11735 0.678 NA 621 2
## 7 TRUE T 25 0.00145 NA 621 2
## 8 TRUE W 265 0.0153 NA 621 2
## 9 FALSE W 5283 0.305 NA 621 3
## 10 TRUE L 11723 0.678 NA 621 3
## # ℹ 186 more rows
tmpWL %>%
mutate(outc=case_when(!qual ~ 1, res=="L" ~ -2, res=="W" ~ 2, res=="T" ~ 0)) %>%
group_by(extraCard) %>%
summarize(ev=sum(outc*n)/sum(n), pQual=sum(qual*n)/sum(n)) %>%
mutate(ecSuit=1+((extraCard-1) %/% 13),
ecRank=1+((extraCard-1) %% 13),
ecRank=ifelse(ecRank==1, 14, ecRank)
) %>%
ggplot(aes(x=factor(ecRank, levels=14:2), y=ev)) +
geom_text(aes(y=ev+0.005*ifelse(ev>-1, -1, 1),
label=round(ev, 3),
hjust=ifelse(ev>-1, 1, 0)
),
size=2.5
) +
geom_point() +
coord_flip() +
facet_wrap(~ecSuit, nrow=1) +
geom_hline(yintercept=-1, lty=2) +
labs(y="EV",
x="Card Rank",
title=paste0("EV of playing ",
paste0(tmpCards, collapse="-"),
" if one extra card excluded"
),
subtitle="Facetted by suit"
)
Data are integrated to include a full picture of possible outcomes, for use in sampling:
# Table of ante bonus and pair-plus bonus
tblBonus <- tibble::tibble(handType=sort(unique(tblHandTypes$handType)),
ppBonus=c(40, 30, 6, 3, 1, -1),
anteBonus=c(5, 4, 1, 0, 0, 0)
)
tblBonus
## # A tibble: 6 × 3
## handType ppBonus anteBonus
## <chr> <dbl> <dbl>
## 1 01. SF 40 5
## 2 02. Trips 30 4
## 3 03. Straight 6 1
## 4 04. Flush 3 0
## 5 05. Pair 1 0
## 6 06. High Card -1 0
# Add hand type, optimal action, and pivot longer
# Add qualification, win/loss, main payout, ante bonus, pair-plus bonus
allTCPOutcome <- allTCPHandN %>%
full_join(select(tblHandTypes, handNum, handType), by="handNum") %>%
full_join(select(allTCPHandAction, handNum, action), by="handNum") %>%
pivot_longer(cols=-c(handNum, handType, action), values_to="n") %>%
mutate(dealerQual=str_sub(name, 3, 3),
playerRaw=str_sub(name, 2, 2),
antePay=case_when(action=="Fold"~-1,
dealerQual=="N"~1,
playerRaw=="W"~1,
playerRaw=="T"~0,
playerRaw=="L"~-1
),
playPay=case_when(action=="Fold"~0,
dealerQual=="N"~0,
playerRaw=="W"~1,
playerRaw=="T"~0,
playerRaw=="L"~-1
)
) %>%
left_join(tblBonus, by="handType")
allTCPOutcome
## # A tibble: 132,600 × 11
## handNum handType action name n dealerQual playerRaw antePay playPay
## <int> <chr> <chr> <chr> <int> <chr> <chr> <dbl> <dbl>
## 1 1 01. SF Play nWN 5706 N W 1 0
## 2 1 01. SF Play nLQ 41 Q L -1 -1
## 3 1 01. SF Play nTQ 3 Q T 0 0
## 4 1 01. SF Play nWQ 12674 Q W 1 1
## 5 1 01. SF Play nLN 0 N L 1 0
## 6 1 01. SF Play nTN 0 N T 1 0
## 7 2 04. Flush Play nWN 5721 N W 1 0
## 8 2 04. Flush Play nLQ 896 Q L -1 -1
## 9 2 04. Flush Play nTQ 3 Q T 0 0
## 10 2 04. Flush Play nWQ 11804 Q W 1 1
## # ℹ 132,590 more rows
## # ℹ 2 more variables: ppBonus <dbl>, anteBonus <dbl>
# Confirm expected values
tmpSum <- allTCPOutcome %>%
select(-handNum) %>%
summarize(across(where(is.numeric), .fns=function(x) sum(x*n)/sum(n)))
tmpSum %>% select(-n)
## # A tibble: 1 × 4
## antePay playPay ppBonus anteBonus
## <dbl> <dbl> <dbl> <dbl>
## 1 -0.101 0.0146 -0.0728 0.0529
tmpSum %>% select(-n, -ppBonus) %>% sum()
## [1] -0.03372981
Expected values are consistent with the summary provided by Wizard of Odds website:
Hands are simulated, with summary statistics calculated:
nSims <- 10000
nPer <- 100
set.seed(23110314)
# Example simulation
allTCPSim <- allTCPOutcome %>%
sample_n(size=nSims*nPer, replace=TRUE, weight=n) %>%
mutate(simNumber=rep(1:nSims, each=nPer))
allTCPSim
## # A tibble: 1,000,000 × 12
## handNum handType action name n dealerQual playerRaw antePay playPay
## <int> <chr> <chr> <chr> <int> <chr> <chr> <dbl> <dbl>
## 1 11774 06. High Card Play nLQ 11787 Q L -1 -1
## 2 9505 06. High Card Play nLQ 11738 Q L -1 -1
## 3 17471 06. High Card Play nWN 5743 N W 1 0
## 4 13592 05. Pair Play nWQ 10555 Q W 1 1
## 5 20309 06. High Card Fold nLN 4116 N L -1 0
## 6 14505 06. High Card Fold nLN 3569 N L -1 0
## 7 18505 06. High Card Play nLQ 5670 Q L -1 -1
## 8 2107 05. Pair Play nWQ 10767 Q W 1 1
## 9 1074 06. High Card Play nWQ 5521 Q W 1 1
## 10 16537 05. Pair Play nLQ 2414 Q L -1 -1
## # ℹ 999,990 more rows
## # ℹ 3 more variables: ppBonus <dbl>, anteBonus <dbl>, simNumber <int>
# Example summary of outcomes
allTCPSim %>%
group_by(simNumber) %>%
summarize(n=n(), across(c(antePay, playPay, ppBonus, anteBonus), sum)) %>%
mutate(mainPay=antePay+playPay+anteBonus) %>%
select(-n, -simNumber) %>%
summary()
## antePay playPay ppBonus anteBonus
## Min. :-46.00 Min. :-26.000 Min. :-76.000 Min. : 0.000
## 1st Qu.:-16.00 1st Qu.: -3.000 1st Qu.:-28.000 1st Qu.: 3.000
## Median :-10.00 Median : 2.000 Median :-12.000 Median : 5.000
## Mean :-10.03 Mean : 1.462 Mean : -7.122 Mean : 5.287
## 3rd Qu.: -4.00 3rd Qu.: 6.000 3rd Qu.: 10.000 3rd Qu.: 7.000
## Max. : 30.00 Max. : 28.000 Max. :164.000 Max. :26.000
## mainPay
## Min. :-69.000
## 1st Qu.:-14.000
## Median : -3.000
## Mean : -3.284
## 3rd Qu.: 8.000
## Max. : 66.000
Simulation means are very close to theoretical game EV
Results by simulation are assessed assuming a 2:1 ratio of Ante-Bonus:
allTCPSim %>%
mutate(mainBet=(antePay+playPay+anteBonus), overall=2*mainBet+ppBonus) %>%
group_by(simNumber) %>%
mutate(ovCum=cumsum(overall), ovMain=cumsum(mainBet), ovBonus=cumsum(ppBonus)) %>%
summarize(minOverall=min(ovCum),
sumOverall=sum(overall),
minMain=min(ovMain),
sumMain=sum(mainBet),
minBonus=min(ovBonus),
sumBonus=sum(ppBonus),
pQ=mean(dealerQual=="Q")
) %>%
summary()
## simNumber minOverall sumOverall minMain
## Min. : 1 Min. :-192.00 Min. :-187.00 Min. :-70.00
## 1st Qu.: 2501 1st Qu.: -66.00 1st Qu.: -51.00 1st Qu.:-20.00
## Median : 5000 Median : -41.00 Median : -18.00 Median :-12.00
## Mean : 5000 Mean : -45.35 Mean : -13.69 Mean :-13.96
## 3rd Qu.: 7500 3rd Qu.: -20.00 3rd Qu.: 20.00 3rd Qu.: -6.00
## Max. :10000 Max. : 42.00 Max. : 232.00 Max. : 6.00
## sumMain minBonus sumBonus pQ
## Min. :-69.000 Min. :-76.00 Min. :-76.000 Min. :0.5200
## 1st Qu.:-14.000 1st Qu.:-33.00 1st Qu.:-28.000 1st Qu.:0.6600
## Median : -3.000 Median :-22.00 Median :-12.000 Median :0.7000
## Mean : -3.284 Mean :-22.95 Mean : -7.122 Mean :0.6955
## 3rd Qu.: 8.000 3rd Qu.:-12.00 3rd Qu.: 10.000 3rd Qu.:0.7300
## Max. : 66.000 Max. : 40.00 Max. :164.000 Max. :0.8700
Theoretical means and standard errors are calculated for comparison:
sdCols <- c("qual", "fold",
"win", "tie", "lose",
"antePay", "playPay", "anteBonus", "mainOutcome",
"ppBonus", "outcome2_1"
)
allTCPOutcome %>%
mutate(mainOutcome=(antePay+playPay+anteBonus),
outcome2_1=2*mainOutcome+ppBonus,
qual=dealerQual=="Q",
win=playerRaw=="W",
tie=playerRaw=="T",
lose=playerRaw=="L",
fold=action=="Fold"
) %>%
pivot_longer(cols=all_of(sdCols), names_to="metric", values_to="metricVal") %>%
group_by(metric) %>%
summarize(mu=sum(metricVal*n)/sum(n),
variance=sum(n*metricVal**2)/sum(n)-mu**2,
sd=sqrt(variance),
se100=sd/10
) %>%
mutate(ord=match(metric, sdCols)) %>%
arrange(ord) %>%
select(-ord)
## # A tibble: 11 × 5
## metric mu variance sd se100
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 qual 0.696 0.212 0.460 0.0460
## 2 fold 0.326 0.220 0.469 0.0469
## 3 win 0.499 0.250 0.500 0.0500
## 4 tie 0.00111 0.00111 0.0332 0.00332
## 5 lose 0.499 0.250 0.500 0.0500
## 6 antePay -0.101 0.989 0.995 0.0995
## 7 playPay 0.0146 0.463 0.681 0.0681
## 8 anteBonus 0.0529 0.122 0.349 0.0349
## 9 mainOutcome -0.0337 2.69 1.64 0.164
## 10 ppBonus -0.0728 8.12 2.85 0.285
## 11 outcome2_1 -0.140 27.9 5.29 0.529
Simulated and theoretical means and standard errors are compared:
sdCols <- c("qual", "fold",
"win", "tie", "lose",
"antePay", "playPay", "anteBonus", "mainOutcome",
"ppBonus", "outcome2_1"
)
tcpTheoretical <- allTCPOutcome %>%
mutate(mainOutcome=(antePay+playPay+anteBonus),
outcome2_1=2*mainOutcome+ppBonus,
qual=dealerQual=="Q",
win=playerRaw=="W",
tie=playerRaw=="T",
lose=playerRaw=="L",
fold=action=="Fold"
) %>%
pivot_longer(cols=all_of(sdCols), names_to="metric", values_to="metricVal") %>%
group_by(metric) %>%
summarize(mu=sum(metricVal*n)/sum(n),
variance=sum(n*metricVal**2)/sum(n)-mu**2,
sd=sqrt(variance)
) %>%
mutate(ord=match(metric, sdCols)) %>%
arrange(ord) %>%
select(-ord)
tcpTheoretical
## # A tibble: 11 × 4
## metric mu variance sd
## <chr> <dbl> <dbl> <dbl>
## 1 qual 0.696 0.212 0.460
## 2 fold 0.326 0.220 0.469
## 3 win 0.499 0.250 0.500
## 4 tie 0.00111 0.00111 0.0332
## 5 lose 0.499 0.250 0.500
## 6 antePay -0.101 0.989 0.995
## 7 playPay 0.0146 0.463 0.681
## 8 anteBonus 0.0529 0.122 0.349
## 9 mainOutcome -0.0337 2.69 1.64
## 10 ppBonus -0.0728 8.12 2.85
## 11 outcome2_1 -0.140 27.9 5.29
tcpSimSummary <- allTCPSim %>%
mutate(mainBet=(antePay+playPay+anteBonus), overall=2*mainBet+ppBonus) %>%
group_by(simNumber) %>%
mutate(ovCum=cumsum(overall), ovMain=cumsum(mainBet), ovBonus=cumsum(ppBonus)) %>%
summarize(minOverall=min(ovCum),
sumOverall=sum(overall),
minMain=min(ovMain),
sumMain=sum(mainBet),
minBonus=min(ovBonus),
sumBonus=sum(ppBonus),
pQ=mean(dealerQual=="Q")
)
tcpSimSummary
## # A tibble: 10,000 × 8
## simNumber minOverall sumOverall minMain sumMain minBonus sumBonus pQ
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 -15 4 -5 6 -8 -8 0.67
## 2 2 -52 13 -11 -2 -32 17 0.71
## 3 3 -96 -90 -33 -31 -31 -28 0.74
## 4 4 -57 3 -16 4 -28 -5 0.66
## 5 5 -40 -32 -12 -8 -17 -16 0.66
## 6 6 -82 -66 -23 -20 -36 -26 0.77
## 7 7 -97 -83 -36 -25 -35 -33 0.75
## 8 8 -31 -30 -10 -9 -14 -12 0.66
## 9 9 -73 -73 -17 -17 -39 -39 0.65
## 10 10 -50 -50 -11 -11 -28 -28 0.68
## # ℹ 9,990 more rows
# Comparison of the main bet and bonus bet
tmpCompare <- tcpSimSummary %>%
select(simNumber, sumMain, sumBonus) %>%
colRenamer(vecRename=c("sumMain"="mainOutcome", "sumBonus"="ppBonus")) %>%
pivot_longer(cols=-c(simNumber)) %>%
group_by(name) %>%
summarize(simMean=mean(value), simSD=sd(value)) %>%
left_join(tcpTheoretical, by=c("name"="metric"))
tmpCompare
## # A tibble: 2 × 6
## name simMean simSD mu variance sd
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 mainOutcome -3.28 16.5 -0.0337 2.69 1.64
## 2 ppBonus -7.12 28.8 -0.0728 8.12 2.85
# Comparison of actual and expected SD
tmpCompare %>%
mutate(sd100=sd*10) %>%
select(metric=name, sim=simSD, theory=sd100) %>%
pivot_longer(cols=-c(metric)) %>%
ggplot(aes(x=metric, y=value)) +
geom_point(aes(color=name)) +
coord_flip() +
labs(title="Theoretical and actual SD for 10,000 simulations of 100 hands",
y="Standard Deviation (SD)",
x=NULL
) +
scale_color_discrete(NULL) +
lims(y=c(0, NA))
# Comparison of actual and expected mean
tmpCompare %>%
mutate(mean100=mu*100,
theorySE=sqrt(100)*sd/sqrt(10000),
oneUp=mean100+theorySE,
oneDown=mean100-theorySE
) %>%
select(metric=name,
sim=simMean,
theoryMean=mean100,
theorySE,
oneUp,
oneDown
) %>%
ggplot(aes(x=metric)) +
geom_point(aes(y=sim), color="blue") +
geom_point(aes(y=theoryMean), color="green") +
geom_errorbar(aes(ymin=oneDown, ymax=oneUp), color="green") +
coord_flip() +
labs(title="Theoretical mean/SE (green) and actual mean (blue)",
subtitle="10,000 simulations of 100 hands",
y="Mean result of 100 hands (theoretical includes +/- 1 SE)",
x=NULL
)
Associations between main bet and bonus are explored:
# Verify the 2:1 ratio
lm(sumOverall ~ sumMain + sumBonus, data=tcpSimSummary) %>%
summary()
## Warning in summary.lm(.): essentially perfect fit: summary may be unreliable
##
## Call:
## lm(formula = sumOverall ~ sumMain + sumBonus, data = tcpSimSummary)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.554e-12 -2.900e-15 1.100e-15 3.500e-15 1.779e-12
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.821e-15 5.092e-16 1.340e+01 <2e-16 ***
## sumMain 2.000e+00 3.433e-17 5.825e+16 <2e-16 ***
## sumBonus 1.000e+00 1.967e-17 5.085e+16 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.924e-14 on 9997 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 5.877e+33 on 2 and 9997 DF, p-value: < 2.2e-16
# Explore associations
tcpSimSummary %>%
select(-simNumber) %>%
cor() %>%
as.data.frame() %>%
rownames_to_column("var1") %>%
tibble::as_tibble() %>%
pivot_longer(cols=-c(var1), names_to="var2", values_to="corr") %>%
filter(var1<=var2) %>%
ggplot(aes(x=var1, y=var2)) +
geom_tile(aes(fill=corr)) +
geom_text(aes(label=round(corr, 3))) +
scale_fill_gradient2("Corr:", high="green") +
labs(title="Correlation of outcomes in 10,000 simulations, each of 100 hands",
y=NULL,
x=NULL
)
Relationships between pQ (percent dealer hands qualifying) and main/bonus bet outcomes are explored:
tcpSimSummary %>%
select(simNumber, pQ, sumMain, sumBonus) %>%
pivot_longer(cols=c(sumMain, sumBonus)) %>%
mutate(r_pQ=round(pQ, 2), r_value=round(5*value)/5) %>%
count(name, r_pQ, r_value) %>%
ggplot(aes(x=r_pQ, y=r_value)) +
geom_point(aes(size=n), alpha=0.1) +
geom_point(data=~summarize(group_by(., name),
across(c(r_pQ, r_value), .fns=function(x) sum(x*n)/sum(n))
),
color="red",
size=3
) +
geom_smooth(aes(weight=n), method="lm") +
facet_wrap(~name) +
labs(title="Main and bonus bet outcomes associated to dealer percent qualified",
subtitle="10,000 simulations, each of 100 hands",
caption="Red dot is simulation mean",
x="Percent dealer qualified", y="Outcome (rounded to nearest 5 units)"
) +
scale_size_continuous("# Sims")
## `geom_smooth()` using formula = 'y ~ x'
Predictive power is explored:
library(ranger) # needs to be called for predict.ranger later
rfTest <- ranger::ranger(sumMain ~ sumBonus + pQ, data=tcpSimSummary)
rfTest
## Ranger result
##
## Call:
## ranger::ranger(sumMain ~ sumBonus + pQ, data = tcpSimSummary)
##
## Type: Regression
## Number of trees: 500
## Sample size: 10000
## Number of independent variables: 2
## Mtry: 1
## Target node size: 5
## Variable importance mode: none
## Splitrule: variance
## OOB prediction error (MSE): 201.4635
## R squared (OOB): 0.2585433
lmTest <- lm(sumMain ~ sumBonus + pQ, data=tcpSimSummary)
summary(lmTest)
##
## Call:
## lm(formula = sumMain ~ sumBonus + pQ, data = tcpSimSummary)
##
## Residuals:
## Min 1Q Median 3Q Max
## -48.934 -9.549 -0.282 9.491 49.724
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 44.503314 2.121469 20.98 <2e-16 ***
## sumBonus 0.283669 0.004872 58.22 <2e-16 ***
## pQ -65.804555 3.042600 -21.63 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.02 on 9997 degrees of freedom
## Multiple R-squared: 0.2768, Adjusted R-squared: 0.2766
## F-statistic: 1913 on 2 and 9997 DF, p-value: < 2.2e-16
The simple linear model and the base random forest each explain ~25% of the variance in main bet outcomes, given bonus bet outcomes and percent dealer qualified
Performance on holdout data is explored:
set.seed(23111015)
idxTrain <- sample(1:nrow(tcpSimSummary),
size=round(0.7*nrow(tcpSimSummary)),
replace=FALSE
)
tcpSimTrain <- tcpSimSummary[idxTrain, ]
tcpSimTest <- tcpSimSummary[-idxTrain, ]
rfTest <- ranger::ranger(sumMain ~ sumBonus + pQ, data=tcpSimTrain)
rfTest
## Ranger result
##
## Call:
## ranger::ranger(sumMain ~ sumBonus + pQ, data = tcpSimTrain)
##
## Type: Regression
## Number of trees: 500
## Sample size: 7000
## Number of independent variables: 2
## Mtry: 1
## Target node size: 5
## Variable importance mode: none
## Splitrule: variance
## OOB prediction error (MSE): 204.4578
## R squared (OOB): 0.2506552
lmTest <- lm(sumMain ~ sumBonus + pQ, data=tcpSimTrain)
summary(lmTest)
##
## Call:
## lm(formula = sumMain ~ sumBonus + pQ, data = tcpSimTrain)
##
## Residuals:
## Min 1Q Median 3Q Max
## -48.921 -9.659 -0.295 9.591 49.913
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 42.830397 2.540661 16.86 <2e-16 ***
## sumBonus 0.283564 0.005825 48.68 <2e-16 ***
## pQ -63.656007 3.645259 -17.46 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.07 on 6997 degrees of freedom
## Multiple R-squared: 0.2747, Adjusted R-squared: 0.2745
## F-statistic: 1325 on 2 and 6997 DF, p-value: < 2.2e-16
# Apply to holdout data
tcpSimPlotData <- tcpSimTest %>%
mutate(lmPred=predict(lmTest, newdata=.),
rfPred=predict(rfTest, data=.)$predictions
) %>%
select(simNumber, sumMain, lmPred, rfPred) %>%
pivot_longer(cols=-c(simNumber, sumMain)) %>%
mutate(err=value-sumMain)
tcpSimPlotData
## # A tibble: 6,000 × 5
## simNumber sumMain name value err
## <int> <dbl> <chr> <dbl> <dbl>
## 1 2 -2 lmPred 2.46 4.46
## 2 2 -2 rfPred 5.25 7.25
## 3 9 -17 lmPred -9.60 7.40
## 4 9 -17 rfPred -9.43 7.57
## 5 14 -17 lmPred -10.4 6.64
## 6 14 -17 rfPred -23.1 -6.11
## 7 19 -14 lmPred -7.98 6.02
## 8 19 -14 rfPred -5.11 8.89
## 9 23 -13 lmPred -15.8 -2.83
## 10 23 -13 rfPred -17.9 -4.89
## # ℹ 5,990 more rows
# Get statistics
tcpSimPlotData %>%
group_by(name) %>%
summarize(n=n(),
err2Orig=mean((sumMain-mean(sumMain))**2),
err2Pred=mean(err**2),
r2=1-err2Pred/err2Orig
)
## # A tibble: 2 × 5
## name n err2Orig err2Pred r2
## <chr> <int> <dbl> <dbl> <dbl>
## 1 lmPred 3000 269. 193. 0.281
## 2 rfPred 3000 269. 201. 0.254
tcpSimPlotData %>%
ggplot(aes(y=sumMain, x=value)) +
geom_point(alpha=0.1) +
facet_wrap(~name) +
labs(title="Predictions on unseen data", x="Predicted Main", y="True Main") +
geom_smooth(method="lm") +
geom_abline(intercept=0, slope=1, lty=2, color="red")
## `geom_smooth()` using formula = 'y ~ x'
Both models perform as well on unseen data as on training data. The relationship known to have perfect predictive power is explored:
rfTest <- ranger::ranger(sumOverall ~ sumMain + sumBonus, data=tcpSimTrain)
rfTest
## Ranger result
##
## Call:
## ranger::ranger(sumOverall ~ sumMain + sumBonus, data = tcpSimTrain)
##
## Type: Regression
## Number of trees: 500
## Sample size: 7000
## Number of independent variables: 2
## Mtry: 1
## Target node size: 5
## Variable importance mode: none
## Splitrule: variance
## OOB prediction error (MSE): 3.762365
## R squared (OOB): 0.9986871
lmTest <- lm(sumOverall ~ sumMain + sumBonus, data=tcpSimTrain)
summary(lmTest)
##
## Call:
## lm(formula = sumOverall ~ sumMain + sumBonus, data = tcpSimTrain)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.423e-12 -9.000e-16 1.500e-15 4.500e-15 1.383e-12
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.902e-14 1.475e-15 -1.290e+01 <2e-16 ***
## sumMain 2.000e+00 9.919e-17 2.016e+16 <2e-16 ***
## sumBonus 1.000e+00 5.675e-17 1.762e+16 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.193e-13 on 6997 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 7.051e+32 on 2 and 6997 DF, p-value: < 2.2e-16
# Apply to holdout data
tcpSimPlotData <- tcpSimTest %>%
mutate(lmPred=predict(lmTest, newdata=.),
rfPred=predict(rfTest, data=.)$predictions
) %>%
select(simNumber, sumOverall, lmPred, rfPred) %>%
pivot_longer(cols=-c(simNumber, sumOverall)) %>%
mutate(err=value-sumOverall)
tcpSimPlotData
## # A tibble: 6,000 × 5
## simNumber sumOverall name value err
## <int> <dbl> <chr> <dbl> <dbl>
## 1 2 13 lmPred 13.0 6.04e-14
## 2 2 13 rfPred 12.5 -4.86e- 1
## 3 9 -73 lmPred -73.0 -1.42e-13
## 4 9 -73 rfPred -73.4 -4.37e- 1
## 5 14 -33 lmPred -33.0 3.55e-14
## 6 14 -33 rfPred -32.6 3.70e- 1
## 7 19 -68 lmPred -68.0 -1.56e-13
## 8 19 -68 rfPred -67.3 6.58e- 1
## 9 23 -69 lmPred -69.0 -1.71e-13
## 10 23 -69 rfPred -68.7 2.52e- 1
## # ℹ 5,990 more rows
# Get statistics
tcpSimPlotData %>%
group_by(name) %>%
summarize(n=n(),
err2Orig=mean((sumOverall-mean(sumOverall))**2),
err2Pred=mean(err**2),
r2=1-err2Pred/err2Orig
)
## # A tibble: 2 × 5
## name n err2Orig err2Pred r2
## <chr> <int> <dbl> <dbl> <dbl>
## 1 lmPred 3000 2813. 1.42e-26 1
## 2 rfPred 3000 2813. 4.71e+ 0 0.998
tcpSimPlotData %>%
ggplot(aes(y=sumOverall, x=value)) +
geom_point(alpha=0.1) +
facet_wrap(~name) +
labs(title="Predictions on unseen data", x="Predicted Overall", y="True Overall") +
geom_smooth(method="lm") +
geom_abline(intercept=0, slope=1, lty=2, color="red")
## `geom_smooth()` using formula = 'y ~ x'
Both models perform very well, though the linear model is slightly better at finding the exact linear relationship among the variables
ECDF are explored for simulation summaries:
percKey <- c(0, 0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99, 1)
tcpSimSummary %>%
select(minMain) %>%
arrange(minMain) %>%
mutate(pct=(row_number()-1)/(n()-1)) %>%
ggplot(aes(x=minMain, y=pct)) +
geom_text(data=~filter(., round(pct,4) %in% percKey),
aes(label=paste0(minMain, " (", round(1000*pct)/10, "%)"),
hjust=ifelse(pct %in% c(0, 1), 0, 1),
vjust=ifelse(pct==1, 1, 0)
),
color="red",
size=2.5
) +
geom_point(alpha=0.1, size=0.5) +
labs(title="ECDF of minimum position of main bet over 100 hands",
y=NULL,
x="Minimum position of main bet (100 hands)",
subtitle="10,000 simulations"
)
ECDF are updated to include final main bet summaries:
# Need better rounding for percentiles and selection
percKey <- c(0, 0.05, 0.25, 0.5001, 0.75, 0.95, 1)
tcpSimSummary %>%
select(simNumber, minMain, sumMain) %>%
pivot_longer(cols=-c(simNumber)) %>%
arrange(name, value) %>%
group_by(name) %>%
mutate(pct=(row_number()-1)/(n()-1)) %>%
ggplot(aes(x=value, y=pct)) +
geom_text(data=~filter(., round(pct,4) %in% percKey),
aes(label=paste0(value, " (", round(1000*pct)/10, "%)"),
hjust=ifelse(pct %in% c(0, 1), 0, 1),
vjust=ifelse(pct==1, 1, 0)
),
color="red",
size=2.5
) +
geom_point(alpha=0.1, size=0.5) +
facet_wrap(~name) +
labs(title="ECDF of main bet over 100 hands",
y=NULL,
x="Main bet (100 hands)",
subtitle="10,000 simulations"
)
The process is updated for better percentile matching:
percKey <- c(0, 0.05, 0.25, 0.5, 0.75, 0.95, 1)
# Function to get closest match in x to y
getBestMatch <- function(x, y, preferLess=TRUE) {
x <- sort(x)
op <- vector("numeric", length(y))
for(ctr in 1:length(y)) {
av <- abs(x-y[ctr])
wm <- which.min(av)
op[ctr] <- x[wm[ifelse(isTRUE(preferLess), 1, length(wm))]]
}
op
}
# Check pulling reasonable matches
getBestMatch((0:9999)/9999, percKey)
## [1] 0.000000 0.050005 0.250025 0.500050 0.749975 0.949995 1.000000
tcpSimSummary %>%
select(simNumber, minMain, sumMain) %>%
pivot_longer(cols=-c(simNumber)) %>%
arrange(name, value) %>%
group_by(name) %>%
mutate(pct=(row_number()-1)/(n()-1)) %>%
ggplot(aes(x=value, y=pct)) +
geom_text(data=~filter(., pct %in% getBestMatch(.$pct, percKey)),
aes(label=paste0(value, " (", round(1000*pct)/10, "%)"),
hjust=ifelse(pct %in% c(0, 1), 0, 1),
vjust=ifelse(pct==1, 1, 0)
),
color="red",
size=2.5
) +
geom_point(alpha=0.1, size=0.5) +
facet_wrap(~name) +
labs(title="ECDF of main bet over 100 hands",
y=NULL,
x="Main bet (100 hands)",
subtitle="10,000 simulations"
)
The ECDF process is converted to functional form:
getSimECDF <- function(df,
ecdfVars,
uqid="simNumber",
percPoint=c(),
labObj=labs()
) {
# FUNCTION ARGUMENTS:
# df: data frame containing simulation data
# ecdfVars: variables to use in creating ECDF
# uqid: unique id in simulations
# percPoint: percentiles to call out values on ECDF plot
# labObj: labelling object
p1 <- df %>%
select(all_of(c(uqid, ecdfVars))) %>%
pivot_longer(cols=-c(all_of(uqid))) %>%
arrange(name, value) %>%
group_by(name) %>%
mutate(pct=(row_number()-1)/(n()-1)) %>%
ggplot(aes(x=value, y=pct)) +
geom_text(data=~filter(., pct %in% getBestMatch(.$pct, percPoint)),
aes(label=paste0(value, " (", round(1000*pct)/10, "%)"),
hjust=ifelse(pct %in% c(0, 1), 0, 1),
vjust=ifelse(pct==1, 1, 0)
),
color="red",
size=2.5
) +
geom_point(alpha=0.1, size=0.5) +
labObj
if(length(ecdfVars)>1) p1 <- p1 + facet_wrap(~name, nrow=1)
p1
}
keyLabs <- labs(title="ECDF over 100 hands",
subtitle="10,000 simulations",
y=NULL,
x=NULL
)
# No percent-values plotted, no labels
getSimECDF(tcpSimSummary, ecdfVars=c("minMain"))
getSimECDF(tcpSimSummary, ecdfVars=c("minMain", "sumMain"))
# Percent-values plotted, standard labels
getSimECDF(tcpSimSummary, ecdfVars=c("minMain"), percPoint=percKey, labObj=keyLabs)
getSimECDF(tcpSimSummary,
ecdfVars=c("minMain", "sumMain"),
percPoint=percKey,
labObj=keyLabs
)
# Customized labels
cLab <- keyLabs
cLab$x <- "Main bet minimum (units)"
getSimECDF(tcpSimSummary,
ecdfVars=c("minMain"),
percPoint=percKey,
labObj=cLab
)
cLab$x <- "Main bet (units)"
getSimECDF(tcpSimSummary,
ecdfVars=c("minMain", "sumMain"),
percPoint=percKey,
labObj=cLab
)
The ECDF process is run for bonus bets:
# Customized labels
cLab$x <- "Bonus bet (units)"
getSimECDF(tcpSimSummary,
ecdfVars=c("minBonus", "sumBonus"),
percPoint=percKey,
labObj=cLab
)
The ECDF process is run for overall outcome:
# Customized labels
cLab$x <- "Overall (units)\n2*Main + Bonus"
getSimECDF(tcpSimSummary,
ecdfVars=c("minOverall", "sumOverall"),
percPoint=percKey,
labObj=cLab
)
Relationships between minimum and final are explored:
lmSim <- lm(sumOverall ~ minOverall, data=tcpSimSummary)
summary(lmSim)
##
## Call:
## lm(formula = sumOverall ~ minOverall, data = tcpSimSummary)
##
## Residuals:
## Min 1Q Median 3Q Max
## -64.99 -21.27 -7.12 13.85 207.10
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 48.964429 0.532423 91.97 <2e-16 ***
## minOverall 1.381617 0.009622 143.59 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 30.51 on 9998 degrees of freedom
## Multiple R-squared: 0.6734, Adjusted R-squared: 0.6734
## F-statistic: 2.062e+04 on 1 and 9998 DF, p-value: < 2.2e-16
tcpSimSummary %>%
select(simNumber, minOverall, sumOverall) %>%
count(minOverall, sumOverall) %>%
ggplot(aes(x=minOverall, y=sumOverall)) +
geom_point(aes(size=n), alpha=0.1) +
labs(x="Overall minimum",
y="Overall final",
title="Final vs. minimum (overall)"
) +
geom_smooth(method="lm", aes(weight=n)) +
geom_abline(slope=1, intercept=0, color="red", lty=2) +
geom_text(data=~summarize(.,
minOverall=min(minOverall),
sumOverall=max(sumOverall)
),
aes(label=paste0("y = ",
round(coef(lmSim)[1], 1),
" + ",
round(coef(lmSim)[2], 3),
"*x\n",
"R2: ",
round(summary(lmSim)$r.squared, 3)
)
),
hjust=0,
vjust=1
)
## `geom_smooth()` using formula = 'y ~ x'
The process is converted to functional form:
finalVsMin <- function(df, y, x, txt="") {
# FUNCTION ARGUMENTS:
# df: data frame with simulations
# y: dependent variable
# x: independent variable
# txt: descriptor for chart labeling
tmpLM <- lm(get(y) ~ get(x), data=df)
summary(tmpLM)
p1 <- df %>%
group_by(across(all_of(c(y, x)))) %>%
summarize(n=n(), .groups="drop") %>%
ggplot(aes(x=.data[[x]], y=.data[[y]])) +
geom_point(aes(size=n), alpha=0.1) +
labs(x=paste0(txt, " Minimum"),
y=paste0(txt, " Final"),
title=paste0("Final vs. Minimum",
ifelse(txt=="", "", paste0(" (", txt, ")"))
)
) +
geom_smooth(method="lm", aes(weight=n)) +
geom_abline(slope=1, intercept=0, color="red", lty=2) +
geom_text(data=~summarize(.,
minX=min(get(x)),
maxY=max(get(y))
),
aes(x=minX,
y=maxY,
label=paste0("y = ",
round(coef(tmpLM)[1], 1),
" + ",
round(coef(tmpLM)[2], 3),
"*x\n",
"R2: ",
round(summary(tmpLM)$r.squared, 3)
)
),
hjust=0,
vjust=1
)
print(p1)
}
finalVsMin(tcpSimSummary, y="sumOverall", x="minOverall", txt="Overall")
## `geom_smooth()` using formula = 'y ~ x'
The process is run for the main bet and bonus bet:
finalVsMin(tcpSimSummary, y="sumMain", x="minMain", txt="Main")
## `geom_smooth()` using formula = 'y ~ x'
finalVsMin(tcpSimSummary, y="sumBonus", x="minBonus", txt="Bonus")
## `geom_smooth()` using formula = 'y ~ x'
The function is run for comparing end results:
finalVsMin(tcpSimSummary, y="sumOverall", x="sumMain", txt="Main")
## `geom_smooth()` using formula = 'y ~ x'
finalVsMin(tcpSimSummary, y="sumOverall", x="sumBonus", txt="Bonus")
## `geom_smooth()` using formula = 'y ~ x'
The function can be updated for better control of title and axis labels:
finalVsMin <- function(df, y, x, txt="", xTxt="Minimum", yTxt="Final") {
# FUNCTION ARGUMENTS:
# df: data frame with simulations
# y: dependent variable
# x: independent variable
# txt: descriptor for chart labeling
# xTxt: x-axis descriptor for chart labeling
# yTxt: y-axis descriptor for chart labeling
# Create text labels
titleTxt <- paste0(yTxt,
" vs. ",
xTxt,
ifelse(txt=="", "", paste0(" (", txt, ")"))
)
xTxt <- paste0(ifelse(txt=="", "", paste0(txt, " ")), xTxt)
yTxt <- paste0(ifelse(txt=="", "", paste0(txt, " ")), yTxt)
# Run linear regression
tmpLM <- lm(get(y) ~ get(x), data=df)
p1 <- df %>%
group_by(across(all_of(c(y, x)))) %>%
summarize(n=n(), .groups="drop") %>%
ggplot(aes(x=.data[[x]], y=.data[[y]])) +
geom_point(aes(size=n), alpha=0.1) +
labs(x=xTxt, y=yTxt, title=titleTxt) +
geom_smooth(method="lm", aes(weight=n)) +
geom_abline(slope=1, intercept=0, color="red", lty=2) +
geom_text(data=~summarize(.,
minX=min(get(x)),
maxY=max(get(y))
),
aes(x=minX,
y=maxY,
label=paste0("y = ",
round(coef(tmpLM)[1], 1),
" + ",
round(coef(tmpLM)[2], 3),
"*x\n",
"R2: ",
round(summary(tmpLM)$r.squared, 3)
)
),
hjust=0,
vjust=1
)
print(p1)
}
finalVsMin(tcpSimSummary, y="sumOverall", x="minOverall", txt="Overall")
## `geom_smooth()` using formula = 'y ~ x'
finalVsMin(tcpSimSummary,
y="sumOverall",
x="sumMain",
txt="Final",
xTxt="Main",
yTxt="Overall"
)
## `geom_smooth()` using formula = 'y ~ x'
finalVsMin(tcpSimSummary,
y="sumOverall",
x="sumBonus",
txt="Final",
xTxt="Bonus",
yTxt="Overall"
)
## `geom_smooth()` using formula = 'y ~ x'
The results of regressions are compared using bonus, given Overall = 2*Main + Bonus:
# Regression of main vs. bonus
lmMainBonus <- lm(sumMain ~ sumBonus, data=tcpSimSummary)
summary(lmMainBonus)
##
## Call:
## lm(formula = sumMain ~ sumBonus, data = tcpSimSummary)
##
## Residuals:
## Min 1Q Median 3Q Max
## -53.894 -9.632 -0.176 9.637 51.953
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.272799 0.147762 -8.614 <2e-16 ***
## sumBonus 0.282307 0.004984 56.638 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.34 on 9998 degrees of freedom
## Multiple R-squared: 0.2429, Adjusted R-squared: 0.2428
## F-statistic: 3208 on 1 and 9998 DF, p-value: < 2.2e-16
# Expected coefficients for Overall ~ Bonus
cat("\nExpected coefficients for Overall ~ Bonus:\n",
"Intercept: ", round(2*coef(lmMainBonus)[1], 3),
"\nSlope: ", 1 + round(2*coef(lmMainBonus)[2], 3),
"\n",
sep=""
)
##
## Expected coefficients for Overall ~ Bonus:
## Intercept: -2.546
## Slope: 1.565
# Actual coefficients for Overall ~ Bonus
lmOverallBonus <- lm(sumOverall ~ sumBonus, data=tcpSimSummary)
cat("\nActual coefficients for Overall ~ Bonus:\n",
"Intercept: ", round(coef(lmOverallBonus)[1], 3),
"\nSlope: ", round(coef(lmOverallBonus)[2], 3),
"\nR-squared: ", round(summary(lmOverallBonus)$r.squared, 3),
"\n",
sep=""
)
##
## Actual coefficients for Overall ~ Bonus:
## Intercept: -2.546
## Slope: 1.565
## R-squared: 0.711
The results of regressions are compared using main, given Overall = 2*Main + Bonus:
# Regression of main vs. bonus
lmBonusMain <- lm(sumBonus ~ sumMain, data=tcpSimSummary)
summary(lmBonusMain)
##
## Call:
## lm(formula = sumBonus ~ sumMain, data = tcpSimSummary)
##
## Residuals:
## Min 1Q Median 3Q Max
## -78.679 -17.796 -3.819 14.280 145.926
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.29713 0.25533 -16.83 <2e-16 ***
## sumMain 0.86044 0.01519 56.64 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 25.04 on 9998 degrees of freedom
## Multiple R-squared: 0.2429, Adjusted R-squared: 0.2428
## F-statistic: 3208 on 1 and 9998 DF, p-value: < 2.2e-16
# Expected coefficients for Overall ~ Main
cat("\nExpected coefficients for Overall ~ Main:\n",
"Intercept: ", round(coef(lmBonusMain)[1], 3),
"\nSlope: ", 2 + round(coef(lmBonusMain)[2], 3),
"\n",
sep=""
)
##
## Expected coefficients for Overall ~ Main:
## Intercept: -4.297
## Slope: 2.86
# Actual coefficients for Overall ~ Main
lmOverallMain <- lm(sumOverall ~ sumMain, data=tcpSimSummary)
cat("\nActual coefficients for Overall ~ Main:\n",
"Intercept: ", round(coef(lmOverallMain)[1], 3),
"\nSlope: ", round(coef(lmOverallMain)[2], 3),
"\nR-squared: ", round(summary(lmOverallMain)$r.squared, 3),
"\n",
sep=""
)
##
## Actual coefficients for Overall ~ Main:
## Intercept: -4.297
## Slope: 2.86
## R-squared: 0.78
The results of regressions on minimum values are compared using bonus, given Overall = 2*Main + Bonus:
# Regression of main vs. bonus
lmMinMainBonus <- lm(minMain ~ minBonus, data=tcpSimSummary)
summary(lmMinMainBonus)
##
## Call:
## lm(formula = minMain ~ minBonus, data = tcpSimSummary)
##
## Residuals:
## Min 1Q Median 3Q Max
## -46.770 -5.481 1.212 6.535 25.586
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.788139 0.177251 -32.66 <2e-16 ***
## minBonus 0.355966 0.006591 54.01 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.244 on 9998 degrees of freedom
## Multiple R-squared: 0.2258, Adjusted R-squared: 0.2258
## F-statistic: 2917 on 1 and 9998 DF, p-value: < 2.2e-16
# (Rough) expected coefficients for Overall ~ Bonus
cat("\nExpected (rough) coefficients for minimum Overall ~ Bonus:\n",
"Intercept: ", round(2*coef(lmMinMainBonus)[1], 3),
"\nSlope: ", 1 + round(2*coef(lmMinMainBonus)[2], 3),
"\n",
sep=""
)
##
## Expected (rough) coefficients for minimum Overall ~ Bonus:
## Intercept: -11.576
## Slope: 1.712
# Actual coefficients for Overall ~ Bonus
lmMinOverallBonus <- lm(minOverall ~ minBonus, data=tcpSimSummary)
cat("\nActual coefficients for minimum Overall ~ Bonus:\n",
"Intercept: ", round(coef(lmMinOverallBonus)[1], 3),
"\nSlope: ", round(coef(lmMinOverallBonus)[2], 3),
"\nR-squared: ", round(summary(lmMinOverallBonus)$r.squared, 3),
"\n",
sep=""
)
##
## Actual coefficients for minimum Overall ~ Bonus:
## Intercept: -5.697
## Slope: 1.728
## R-squared: 0.584
# Actual regression for minima
lm(minOverall ~ minMain + minBonus, data=tcpSimSummary) %>%
summary()
##
## Call:
## lm(formula = minOverall ~ minMain + minBonus, data = tcpSimSummary)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.514 -4.256 -1.866 2.426 41.657
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.535535 0.122050 53.55 <2e-16 ***
## minMain 2.113371 0.006546 322.84 <2e-16 ***
## minBonus 0.975744 0.004903 199.00 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.05 on 9997 degrees of freedom
## Multiple R-squared: 0.9636, Adjusted R-squared: 0.9636
## F-statistic: 1.323e+05 on 2 and 9997 DF, p-value: < 2.2e-16
Timing of first hitting the overall minimum and maximum by simulation is calculated:
# Pull first occurrence of minima and maxima
tcpSimWhich <- allTCPSim %>%
mutate(mainBet=(antePay+playPay+anteBonus), overall=2*mainBet+ppBonus) %>%
group_by(simNumber) %>%
mutate(ovCum=cumsum(overall), ovMain=cumsum(mainBet), ovBonus=cumsum(ppBonus)) %>%
summarize(across(c(ovCum, ovMain, ovBonus),
.fns=list(whichmin=function(x) which.min(x),
whichmax=function(x) which.max(x)
)
)
)
tcpSimWhich
## # A tibble: 10,000 × 7
## simNumber ovCum_whichmin ovCum_whichmax ovMain_whichmin ovMain_whichmax
## <int> <int> <int> <int> <int>
## 1 1 7 61 7 61
## 2 2 72 100 72 40
## 3 3 92 23 92 23
## 4 4 43 85 43 85
## 5 5 94 22 75 9
## 6 6 82 2 82 2
## 7 7 65 5 65 5
## 8 8 99 55 99 55
## 9 9 100 8 100 61
## 10 10 100 6 100 63
## # ℹ 9,990 more rows
## # ℹ 2 more variables: ovBonus_whichmin <int>, ovBonus_whichmax <int>
Distributions for hand where maximum is first achieved are plotted:
tcpSimWhich %>%
select(simNumber, ends_with("max")) %>%
pivot_longer(cols=-c(simNumber)) %>%
mutate(useName=str_replace_all(name, pattern="_whichmax", replacement="")) %>%
count(useName, value) %>%
ggplot(aes(x=value, y=n)) +
geom_line(aes(group=useName, color=useName)) +
labs(title="Hand number where overall maximum is first obtained",
subtitle="10,000 simulations, each of 100 hands",
y="# Simulations\n(log-10 scale)",
x="Hand where overall maximum is first achieved"
) +
scale_color_discrete("Bet Type") +
scale_y_log10() +
geom_text(data=~filter(., value %in% c(1, 100)),
aes(label=n, color=useName, hjust=ifelse(value==1, 1, 0)),
size=2.5
)
As expected given negative per-hand expectation, maximum is typically reached early in the simulation
Distributions for hand where minimum is first achieved are plotted:
tcpSimWhich %>%
select(simNumber, ends_with("min")) %>%
pivot_longer(cols=-c(simNumber)) %>%
mutate(useName=str_replace_all(name, pattern="_whichmin", replacement="")) %>%
count(useName, value) %>%
ggplot(aes(x=value, y=n)) +
geom_line(aes(group=useName, color=useName)) +
labs(title="Hand number where overall minimum is first obtained",
subtitle="10,000 simulations, each of 100 hands",
y="# Simulations\n(log-10 scale)",
x="Hand where overall minimum is first achieved"
) +
scale_color_discrete("Bet Type") +
scale_y_log10() +
geom_text(data=~filter(., value %in% c(1, 100)),
aes(label=n, color=useName, hjust=ifelse(value==1, 1, 0)),
size=2.5
)
As expected given negative per-hand expectation, minimum is typically reached late in the simulation
Time for reaching minimum and maximum is compared (overall):
tcpSimWhich %>%
select(simNumber, starts_with("ovCum")) %>%
pivot_longer(cols=-c(simNumber)) %>%
mutate(useName=str_replace_all(name, pattern="_which.*", replacement=""),
metric=str_replace_all(name, pattern=".*_which", replacement="")
) %>%
pivot_wider(id_cols=c("simNumber", "useName"),
names_from="metric",
values_from="value"
) %>%
count(useName, min, max) %>%
ggplot(aes(x=min, y=max)) +
geom_point(aes(size=n), alpha=0.25) +
labs(title="Hand number where overall minimum and maximum is first obtained",
subtitle="10,000 simulations, each of 100 hands",
y="Hand where overall maximum is first achieved",
x="Hand where overall minimum is first achieved",
caption="Red point is weighted mean"
) +
scale_size_continuous("# Sims") +
geom_smooth(method="lm", aes(weight=n)) +
geom_point(data=~summarize(., across(c(min, max),
.fns=function(x) sum(x*n)/sum(n)
)
),
size=5,
color="red"
)
## `geom_smooth()` using formula = 'y ~ x'
Time for reaching minimum and maximum is compared (all):
tcpSimWhich %>%
select(simNumber, starts_with("ov")) %>%
pivot_longer(cols=-c(simNumber)) %>%
mutate(useName=str_replace_all(name, pattern="_which.*", replacement=""),
metric=str_replace_all(name, pattern=".*_which", replacement="")
) %>%
pivot_wider(id_cols=c("simNumber", "useName"),
names_from="metric",
values_from="value"
) %>%
count(useName, min, max) %>%
ggplot(aes(x=min, y=max)) +
geom_point(aes(size=n), alpha=0.25) +
labs(title="Hand number where minimum and maximum is first obtained",
subtitle="10,000 simulations, each of 100 hands",
y="Hand where maximum is first achieved",
x="Hand where minimum is first achieved",
caption="Red point is weighted mean"
) +
scale_size_continuous("# Sims") +
geom_smooth(method="lm", aes(weight=n)) +
facet_wrap(~useName) +
geom_point(data=~summarize(group_by(., useName),
across(c(min, max), .fns=function(x) sum(x*n)/sum(n))
),
size=5,
color="red"
)
## `geom_smooth()` using formula = 'y ~ x'
Data for being at an all-time low/high is explored:
# Get new minima and maxima
tcpSimAllTime <- allTCPSim %>%
mutate(mainBet=(antePay+playPay+anteBonus), overall=2*mainBet+ppBonus) %>%
group_by(simNumber) %>%
mutate(ovCum=cumsum(overall), ovMain=cumsum(mainBet), ovBonus=cumsum(ppBonus)) %>%
select(simNumber, ovCum, ovMain, ovBonus) %>%
mutate(hn=row_number()) %>%
ungroup() %>%
pivot_longer(cols=-c(simNumber, hn)) %>%
arrange(simNumber, name, hn) %>%
group_by(simNumber, name) %>%
mutate(cmax=cummax(value),
cmin=cummin(value),
newmax=(cmax==value & (hn==1 | cmax>lag(cmax))),
newmin=(cmin==value & (hn==1 | cmin<lag(cmin))),
tiemax=(cmax==value) & !newmax,
tiemin=(cmax==value) & !newmin
) %>%
ungroup()
tcpSimAllTime
## # A tibble: 3,000,000 × 10
## simNumber hn name value cmax cmin newmax newmin tiemax tiemin
## <int> <int> <chr> <dbl> <dbl> <dbl> <lgl> <lgl> <lgl> <lgl>
## 1 1 1 ovBonus -1 -1 -1 TRUE TRUE FALSE FALSE
## 2 1 2 ovBonus -2 -1 -2 FALSE TRUE FALSE FALSE
## 3 1 3 ovBonus -3 -1 -3 FALSE TRUE FALSE FALSE
## 4 1 4 ovBonus -2 -1 -3 FALSE FALSE FALSE FALSE
## 5 1 5 ovBonus -3 -1 -3 FALSE FALSE FALSE FALSE
## 6 1 6 ovBonus -4 -1 -4 FALSE TRUE FALSE FALSE
## 7 1 7 ovBonus -5 -1 -5 FALSE TRUE FALSE FALSE
## 8 1 8 ovBonus -4 -1 -5 FALSE FALSE FALSE FALSE
## 9 1 9 ovBonus -5 -1 -5 FALSE FALSE FALSE FALSE
## 10 1 10 ovBonus -4 -1 -5 FALSE FALSE FALSE FALSE
## # ℹ 2,999,990 more rows
Probability for being at an all-time high for overall is plotted:
# Probability for being at all-time high overall
tcpSimAllTime %>%
filter(name=="ovCum") %>%
count(hn, newmax, tiemax) %>%
group_by(hn) %>%
mutate(pct=n/sum(n),
type=case_when(newmax~"allHigh", tiemax~"tieHigh", TRUE~"none")
) %>%
ungroup() %>%
pivot_wider(id_cols="hn", names_from="type", values_from="pct", values_fill=0) %>%
pivot_longer(cols=-hn) %>%
filter(hn>1) %>%
ggplot(aes(x=hn, y=value)) +
geom_text(data=~filter(., hn==100),
aes(label=round(value, 3)),
hjust=0,
size=2.5
) +
geom_line(aes(group=name,
color=c("allHigh"="3. New high",
"tieHigh"="2. Ties previous high",
"none"="1. Below previous high"
)[name]
)
) +
labs(x="Hand Number",
y="Proportion of simulations",
title="Likelihood of new overall maximum by hand number",
caption="Hand 1 not plotted (always a new high)"
) +
scale_color_discrete(NULL) +
theme(legend.position = "right")
Probability for being at an all-time low for overall is plotted:
# Probability for being at all-time low overall
tcpSimAllTime %>%
filter(name=="ovCum") %>%
count(hn, newmin, tiemin) %>%
group_by(hn) %>%
mutate(pct=n/sum(n),
type=case_when(newmin~"allLow", tiemin~"tieLow", TRUE~"none")
) %>%
ungroup() %>%
pivot_wider(id_cols="hn", names_from="type", values_from="pct", values_fill=0) %>%
pivot_longer(cols=-hn) %>%
filter(hn>1) %>%
ggplot(aes(x=hn, y=value)) +
geom_text(data=~filter(., hn==100),
aes(label=round(value, 3)),
hjust=0,
size=2.5
) +
geom_line(aes(group=name,
color=c("allLow"="3. New low",
"tieLow"="2. Ties previous low",
"none"="1. Above previous low"
)[name]
)
) +
labs(x="Hand Number",
y="Proportion of simulations",
title="Likelihood of new overall minimum by hand number",
caption="Hand 1 not plotted (always a new low)"
) +
scale_color_discrete(NULL) +
theme(legend.position = "right")
As expected with negative expectation, it is more common to reach new lows than new highs as the simulation progresses
Probability for being at an all-time high for all three metrics is plotted:
# Probability for being at all-time high
tcpSimAllTime %>%
count(name, hn, newmax, tiemax) %>%
group_by(name, hn) %>%
mutate(pct=n/sum(n),
type=case_when(newmax~"allHigh", tiemax~"tieHigh", TRUE~"none")
) %>%
ungroup() %>%
rename("metric"="name") %>%
pivot_wider(id_cols=c("metric", "hn"),
names_from="type",
values_from="pct",
values_fill=0
) %>%
pivot_longer(cols=-c(metric, hn)) %>%
filter(hn>1) %>%
ggplot(aes(x=hn, y=value)) +
geom_text(data=~filter(., hn==100),
aes(label=round(value, 3)),
hjust=0,
size=2.5
) +
geom_line(aes(group=name,
color=c("allHigh"="3. New high",
"tieHigh"="2. Ties previous high",
"none"="1. Below previous high"
)[name]
)
) +
labs(x="Hand Number",
y="Proportion of simulations",
title="Likelihood of new overall maximum by hand number",
caption="Hand 1 not plotted (always a new high)"
) +
scale_color_discrete(NULL) +
theme(legend.position = "right") +
facet_wrap(~metric) +
theme(legend.position="bottom")
Location for final hand probabilities is updated:
# Mapping of data label to plot label
cmapHigh <- c("allHigh"="3. New high",
"tieHigh"="2. Ties previous high",
"none"="1. Below previous high"
)
nmapMetric <- c("ovBonus"="3. Bonus", "ovCum"="1. Overall", "ovMain"="2. Main")
# Probability for being at all-time high
tcpSimAllTime %>%
count(name, hn, newmax, tiemax) %>%
group_by(name, hn) %>%
mutate(pct=n/sum(n),
type=case_when(newmax~"allHigh", tiemax~"tieHigh", TRUE~"none")
) %>%
ungroup() %>%
rename("metric"="name") %>%
pivot_wider(id_cols=c("metric", "hn"),
names_from="type",
values_from="pct",
values_fill=0
) %>%
pivot_longer(cols=-c(metric, hn)) %>%
filter(hn>1) %>%
ggplot(aes(x=hn, y=value)) +
geom_text(data=~filter(., hn==100),
aes(label=round(value, 3),
vjust=ifelse(name=="tieHigh", 1, 0),
color=cmapHigh[name]
),
hjust=1,
size=2.5
) +
geom_line(aes(group=name, color=cmapHigh[name])) +
labs(x="Hand Number",
y="Proportion of simulations",
title="Likelihood of new overall maximum by hand number",
caption="Hand 1 not plotted (always a new high)"
) +
scale_color_discrete(NULL) +
theme(legend.position = "right") +
facet_wrap(~nmapMetric[metric]) +
theme(legend.position="bottom")